This R code includes the analyses and figures of the CPS1000 manuscript, written by Kevin D. Hofer and Junyan Lu.

1 Setup

library(knitr)
options(digits=3, width=80)
opts_chunk$set(echo=TRUE,tidy=FALSE,include=TRUE)

1.1 Load libraries

library(ggpmisc)
suppressMessages(library(RColorBrewer))
suppressMessages(library(ggrepel))
suppressMessages(library(cowplot))
suppressMessages(library(gridExtra))
suppressMessages(library(drc))
suppressMessages(library(tidyverse))
detach("package:dplyr") 
suppressMessages(library(dplyr))
library("survminer")
library("survival")
library("maxstat")
library("patchwork")
library("glmnet")
library("corrplot")
library("writexl")
library("ggplot2")
library("flextable")
library("ggpubr")
library("ComplexHeatmap")
library("circlize")
library("grid")
library("RColorBrewer")
library("ggplotify")
library("magrittr")
library("Rtsne")
library("paletteer")
library("ggtext")
library("ggh4x")
library("plotrix")
library("scales")
library("corrplot")
library("gridGraphics")
library("draw")
library("stringr")
library("ggsurvfit")
library("BloodCancerMultiOmics2017")
library("gtable")
library("statebins")
library("DESeq2")
library("apeglm")
library("fgsea")
library("msigdbr")
library("biomaRt")
library("org.Hs.eg.db")
library("ggtext")
library("flextable")
library("data.table")
library("readxl")




#maintain function of specific R packages
rename <- dplyr::rename
count  <- dplyr::count
filter <- dplyr::filter

1.2 Define figure themes

theme_figures <- theme_classic()+
  theme(plot.title = element_text(size=8, hjust=0.5, face="bold"),
        axis.text = element_text(color="black", size=8), 
        axis.title = element_text(size=8), 
        axis.line = element_line(size=0.5), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text = element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
theme_figures_angle45_x <- theme_classic()+
  theme(plot.title=element_text(size=8, hjust=0.5, face="bold"),
        axis.text.x = element_text(color="black", angle = 45, hjust=1, vjust=1, size=8), 
        axis.text.y = element_text(size=8), 
        axis.title = element_text(size=8), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text = element_text(size=8), 
        axis.line = element_line(size=0.5), 
        legend.key.size = unit(0.5, 'cm'))

theme_figures_facet <- theme_bw() +   
  theme(strip.text.y = element_text(size = 0), 
        strip.text.x = element_text(size=8),
        strip.background.y = element_rect(color=NA),
        axis.text = element_text(size=8, color="black"),
        axis.title = element_text(size=8, color="black"),
        panel.spacing = unit(0.15, "lines"), 
        legend.position = "none",
        panel.border = element_rect(color = "black", linewidth = 0.75, fill = NA),
        panel.grid = element_blank(),
        strip.background = element_rect(color = "black", linewidth = 0.75, fill = "grey90"),
        plot.title = element_text(hjust=0.5, size=8, face="bold"))

theme_figures_facet_angle60_x <- theme_bw() +   
  theme(strip.text.y = element_text(size = 0), 
        strip.text.x = element_text(size=8),
        strip.background.y = element_rect(color=NA),
        axis.title = element_text(size=8), 
        axis.text.y = element_text(size =8, color="black"),
        axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size=8, color = "black"),
        panel.spacing = unit(0.15, "lines"), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'),
        panel.border = element_rect(color = "black", linewidth = 0.75, fill = NA),
        strip.background = element_rect(color = "black", linewidth = 0.75, fill = "grey90"),
        plot.title = element_text(hjust=0.5, size=8, face="bold"), 
        strip.text = element_text(size = 8))

theme_figures_facet_angle45_x  <- theme_figures_facet +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

theme_figures_facet_angle45_x_legend <- theme_figures_facet_angle45_x +
  theme(legend.position = "right",
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))
  
theme_figures_border <- theme_bw() +
  theme(axis.text = element_text(color="black", size=8),
        axis.title = element_text(size=8), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'),
        plot.title = element_text(hjust=0.5, size=8, face="bold"), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = 'black', 
                                    fill = NA, 
                                    size = 1))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#set colors
#diagnosis
colors_diag <- setNames(c("#1F78B4", "#B2DF8A", "#A6CEE3", "grey", "#E31A1C", "#FB9A99", "#FF7F00", "#CAB2D6"), c("MCL", "CLL", "T-PLL", "other", "AML", "T-ALL", "B-ALL", "B-PLL"))

colors_diag_IGHV <- setNames(c("#1F78B4", "#B2DF8A", "#33A02C", "grey80", "#E31A1C", "#FB9A99", "#FF7F00", "#CAB2D6", "#A6CEE3"), c("MCL", "U-CLL", "M-CLL", "other", "AML", "T-ALL", "B-ALL", "B-PLL", "T-PLL"))

#pathways
colors_pathways_mod <- setNames(c("lightgrey", "#BC80BD", "#FCCDE5", "#B3DE69", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA", "#FFFFB3", "#8DD3C7"), c("other", "AKT/mTOR", "DNA damage response", "Chemotherapy" ,  "MAPK" , "B-cell receptor", "PI3K", "Stress pathway", "Bromodomain/PLK", "JAK/STAT"))

colors_pathways_mod2 <- setNames(c("lightgrey", "#FCCDE5", "#B3DE69", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA", "#FFFFB3", "#8DD3C7", "#A6CEE3", "#6DBD57", "#DE9E83", "#BC80BD"), c("other", "DNA damage response", "Chemotherapy" ,  "MAPK" , "B-cell receptor", "PI3K", "Stress pathway", "Bromodomain/PLK", "JAK/STAT", "Apoptosis", "Cell cycle control", "Histone methylation", "PI3K/AKT/mTOR"))

colors_cor_heatmap <- setNames(c("#BC80BD", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA"), c("AKT/mTOR", "MEK/ERK" , "B-cell receptor", "PI3K", "Stress pathway"))

#mode of action
colors_drug_type <- c("Chemo" = "#9a6a3d", "Kinase" = "#6A3D9A", "Other" = "#3d9a6a")
labels_drug_type <- c("Chemo" = "Chemotherapy", "Kinase" ="Kinase inhibitor", "Other" = "Other")

#genetics
colors_ddx3x <- setNames(c("#E78AC3", "grey60"), c(1, 0))

colors_del11q <- setNames(c("#E5C494", "grey60"), c(1, 0))

#longitudinal analysis
colors_treatment <- setNames(c("black", "salmon"), c("sample1_value", "sample2_value"))

#cox regression models
colors_cox <- setNames(c("#2171b5", "#cb181d"), c("TTT", "OS"))

colors_ibr_viab <- setNames(c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"), 
                            c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"))

1.3 Define functions

1.3.1 Functions for analyses

#function for t-test
tTest <- function(val, fac) {
  res <- t.test(val ~ fac, var.equal = TRUE)
  data.frame(p = res$p.value, 
             mean1 = res$estimate[[1]],
             mean2 = res$estimate[[2]],
             diff = res$estimate[[2]] - res$estimate[[1]])
}

#helper function for mean viability boxplots
create_boxplot <- function(data, diagnosis_filter, colors, title) {
  #order drugs by median viability
  drug_order <- data |> 
    filter(diagnosis == diagnosis_filter) |> 
    group_by(drug) |> 
    summarise(median = median(mean_viab)) |> 
    arrange(desc(median)) |> 
    pull(drug)
  
  #filter and reorder data
  plot_data <- data |> 
    filter(diagnosis == diagnosis_filter) |> 
    mutate(drug = factor(drug, levels = drug_order))
  
  #create plot
  plot_data |> 
    filter(diagnosis == diagnosis_filter) |> 
    ggplot(aes(x=mean_viab, y=drug, color=diagnosis))+ 
    geom_boxplot(alpha=0.8, outlier.shape=NA) +
    geom_jitter(alpha=0.2, size=0.2, width = 0.15)+
    scale_x_continuous(name ="Mean viability", breaks=seq(0,1.2, 0.2), limits=c(0,1.2))+
    theme_bw() +
    theme(
      axis.text = element_text(color = "black", size = 8),
      axis.title = element_text(size = 8),
      plot.title = element_text(size = 8, hjust = 0.5, face = "bold"),
      legend.position = "none",
      panel.border = element_rect(color = "black", linewidth = 1, fill = NA)) +
    labs(y = "", x = "Mean viability", title = title) +
    scale_color_manual(values = colors_diag_IGHV)
}

#sigmoid curve fitting
testF <- function(m0, m1) {
  Fstat <- ((sum(residuals(m0)^2) - sum(residuals(m1)^2))/(m0$df.residual-m1$df.residual))/(sum(residuals(m1)^2)/m1$df.residual)
  1 - pf(Fstat, m0$df.residual-m1$df.residual, m1$df.residual)
}

calcAUC <- function(m1, minConc =0, maxConc=15, n=100) {
  concSeq <- data.frame(conc = seq(minConc, maxConc, length.out = n))
  valueConc <- data.frame(viab = predict(m1, concSeq),
                          conc = concSeq[,1])
  valueConc <- valueConc[order(valueConc$conc),]
  areaTotal <- 0
  for (i in seq(nrow(valueConc)-1)) {
    areaEach <- (valueConc$viab[i] + valueConc$viab[i+1])*(valueConc$conc[i+1]-valueConc$conc[i])*0.5
    areaTotal <- areaTotal + areaEach
  }
  areaTotal/(valueConc[nrow(valueConc),]$conc-valueConc[1,]$conc)
}

calcParm <- function(df_input) {
  df <- as.data.frame(df_input)
  df$conc <- as.numeric(df$conc)
  df$viab <- as.numeric(df$viab)

  formula_obj <- viab ~ conc

  out <- tryCatch({
    m1 <- eval(substitute(
      drm(formula_obj,
          data = df,
          fct = LL2.3u(),
          robust = "tukey",
          lowerl = c(0, 0, NA),
          upperl = c(4, 1, NA)),
      list(formula_obj = formula_obj, df = df)
    ))

    fitRes <- m1$coefficients
    m0 <- lm(viab ~ 1, data = df)
    pred_vals <- predict(m1)
    r2 <- cor(pred_vals, df$viab, use = "pairwise.complete.obs")^2
    auc <- calcAUC(m1)

    tibble(
      HS = fitRes[[1]],
      Einf = fitRes[[2]],
      IC50 = fitRes[[3]],
      pF = testF(m0, m1),
      R2 = r2,
      AUC = auc
    )
  }, error = function(e) {
    message("Fit failed for a group: ", e$message)
    tibble(
      HS = NA_real_,
      Einf = NA_real_,
      IC50 = NA_real_,
      pF = NA_real_,
      R2 = NA_real_,
      AUC = NA_real_
    )
  })

  return(out)
}

#function to clean and combine multi-omics data
generateData <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
  
  dataScale <- function(x, censor = NULL, robust = FALSE) {
    #function to scale different variables
    if (length(unique(na.omit(x))) == 2){
      #a binary variable, change to -0.5 and 0.5 for 1 and 2
      x - 0.5
    } else if (length(unique(na.omit(x))) == 3) {
      #catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
      (x - 1)/2
    } else {
      if (robust) {
        #continuous variable, centered by median and divied by 2*mad
        mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
        if (!is.null(censor)) {
          mScore[mScore > censor] <- censor
          mScore[mScore < -censor] <- -censor
        }
        mScore/2
      } else {
        mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
        if (!is.null(censor)) {
          mScore[mScore > censor] <- censor
          mScore[mScore < -censor] <- -censor
        }
        mScore/2
      }
    }
  }
  
  
  allResponse <- list() #list for storing response vector (drug viability)
  allExplain <- list() #list for storing explain matrices (multi-omics)
  
  for (drug in colnames(inclSet$drugs)) {
    y <- inclSet$drugs[,drug]
    
    #get overlapped samples for each dataset 
    overSample <- names(y)
    
    for (eachSet in inclSet) {
      overSample <- intersect(overSample,rownames(eachSet))
    }
    
    y <- dataScale(y[overSample], censor = censor, robust = robust)
    allResponse[[drug]] <- y
  }
  
  #generate explanatory variable table for each seahorse measurement
  expTab <- list()
  
  if ("gen" %in% names(inclSet)) {
    geneTab <- inclSet$gen[overSample,]
    #at least 3 mutated sample
    geneTab <- geneTab[, colSums(geneTab) >= 3]
    vecName <- sprintf("genetic(%s)", ncol(geneTab))
    expTab[[vecName]] <- apply(geneTab,2,dataScale)
  }
  
  if ("RNA" %in% names(inclSet)){
    
    #for PCA
    rnaPCA <- inclSet$RNA[overSample, ]
    colnames(rnaPCA) <- paste0("con.expression",colnames(rnaPCA), sep = "")
    expTab[["expression(20)"]] <- apply(rnaPCA,2,dataScale, censor = censor, robust = robust)
    
  }
  
  if ("meth" %in% names(inclSet)){
    methPCA <- inclSet$meth[overSample,]
    colnames(methPCA) <- paste("con.methylation",colnames(methPCA),sep = "")
    expTab[["methylation(20)"]] <- apply(methPCA[,1:20],2,dataScale, censor = censor, robust = robust)
  }
  
  if ("IGHV" %in% names(inclSet)) {
    IGHVtab <- inclSet$IGHV[overSample,,drop=FALSE]
    expTab[["IGHV(1)"]] <- apply(IGHVtab,2,dataScale)
  }
  
  if ("Mcluster" %in% names(inclSet)) {
    methTab <- inclSet$Mcluster[overSample,,drop=FALSE]
    colnames(methTab) <- "methylation cluster"
    expTab[["methCluster(1)"]] <- apply(methTab,2,dataScale)
  }
  
  if ("pretreated" %in% names(inclSet)){
    preTab <- inclSet$pretreated[overSample,,drop=FALSE]
    expTab[["pretreated(1)"]] <- apply(preTab,2,dataScale)
  }
  
  comboTab <- c()
  
  for (eachSet in names(expTab)){
    comboTab <- cbind(comboTab, expTab[[eachSet]])
  }
  vecName <- sprintf("all(%s)", ncol(comboTab))
  expTab[[vecName]] <- comboTab
  
  allExplain <- expTab
  
  if (onlyCombine) {
    #only return combined results, for feature selection
    allExplain <-expTab[length(expTab)]
  }
  
  return(list(allResponse=allResponse, allExplain=allExplain))
  
}

#function for multi-variant regression
runGlm <- function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  varExplain <- c()
  coefMat <- matrix(NA, ncol(X), repeats)
  rownames(coefMat) <- colnames(X)
  
  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  for (i in seq(repeats)) {
    if (ncol(X) > 2) {
      res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian", 
                       nfolds = folds, alpha = alpha, standardize = FALSE)
      lambdaList <- c(lambdaList, res$lambda.min)
      modelList[[i]] <- res
      
      coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
      coefMat[,i] <- coefModel
      
      #calculate variance explained
      y.pred <- predict(res, s = "lambda.min", newx = X)
      varExp <- cor(as.vector(y),as.vector(y.pred))^2
      varExplain[i] <- ifelse(is.na(varExp), 0, varExp) 
      
   
    } else {
      fitlm<-lm(y~., data.frame(X))
      varExp <- summary(fitlm)$r.squared
      varExplain <- c(varExplain, varExp)
      
    }
    
  }
  list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}

#function to calculate pair-wise mean viability differences
create_mean_differences <- function(treatments_df, viab_df) {
  # Get drug columns
  drug_cols <- setdiff(names(viab_df), c("patientID", "sampleID", "sampleDate"))
  
  # Get sample1 values
  sample1_long <- treatments_df %>%
    left_join(viab_df, by = c("sample1" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, sample1, sample2, status, therapy, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample1_value")
  
  # Get sample2 values
  sample2_long <- treatments_df %>%
    left_join(viab_df, by = c("sample2" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample2_value")
  
  # Combine and calculate differences
  differences_long <- sample1_long %>%
    left_join(sample2_long, by = c("patientID", "pair", "drug")) %>%
    mutate(
      difference = sample2_value - sample1_value,
      fold_change = sample2_value / sample1_value,
      percent_change = ((sample2_value - sample1_value) / sample1_value) * 100
    )
  
  return(differences_long)
}


#function to calculate pair-wise single concentration viability differences
create_conc_differences <- function(treatments_df, viab_df) {
  # Get drug columns
  drug_cols <- setdiff(names(merged_table_wide), c("patientID", "sampleID", "sampleDate", "conc_index"))
  
  # Get sample1 values
  sample1_long <- treatments_df %>%
    left_join(viab_df, by = c("sample1" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, sample1, sample2, status, therapy, conc_index, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample1_value")
  
  # Get sample2 values
  sample2_long <- treatments_df %>%
    left_join(viab_df, by = c("sample2" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, conc_index, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample2_value")
  
  # Combine and calculate differences
  differences_long <- sample1_long %>%
    left_join(sample2_long, by = c("patientID", "pair", "drug", "conc_index")) %>%
    mutate(
      difference = sample2_value - sample1_value,
      fold_change = sample2_value / sample1_value,
      percent_change = ((sample2_value - sample1_value) / sample1_value) * 100
    )
  
  return(differences_long)
}

#function for univariate cox regression
com_uni <- function(response, time, endpoint, scale =FALSE) {  
  
  if (scale) {
    #calculate z-score
    response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
  }
  
  tryCatch({
    surv <- coxph(Surv(time, endpoint) ~ response) 
    resTab <- tibble(p = summary(surv)[[7]][,5], 
                     HR = summary(surv)[[7]][,2], 
                     lower = summary(surv)[[8]][,3], 
                     higher = summary(surv)[[8]][,4])
  }, error = function(err) {
    resTab <- tibble(p = NA, 
                     HR = NA, 
                     lower = NA, 
                     higher = NA)
  })
  
}

#function for multivariate cox regression
com <- function(response, ighv, TP53_del17p, age, time, endpoint, scale =FALSE) {  
  
  if (scale) {
    #calculate z-score
    response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
    age <- (age - mean(age, na.rm = TRUE))/sd(age, na.rm=TRUE)
  }
  
  tryCatch({
    surv <- coxph(Surv(time, endpoint) ~ response+ighv+TP53_del17p+age) 
    resTab <- tibble(variable = c("response", "ighv", "TP53_del17p", "age"),
                     p = summary(surv)[[7]][,5], 
                     HR = summary(surv)[[7]][,2], 
                     lower = summary(surv)[[8]][,3], 
                     higher = summary(surv)[[8]][,4])
  }, error = function(err) {
    resTab <- tibble(variable = c("response", "ighv", "TP53_del17p", "age"),
                     p = NA, 
                     HR = NA, 
                     lower = NA, 
                     higher = NA)
  })
  
}

1.3.2 Functions for graphs and tables

#function for the lasso heatmap plot
lassoPlot <- function(lassoOut, cleanData, freqCut = 1, coefCut = 0.01, setNumber = "last") {
  plotList <- list()
  if (setNumber == "last") {
    setNumber <- length(lassoOut[[1]])
  } else {
    setNumber <- setNumber
  }
  
  for (seaName in names(lassoOut)) {
    #for the barplot on the left of the heatmap
    barValue <- rowMeans(lassoOut[[seaName]][[setNumber]]$coefMat)
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]][[setNumber]]$coefMat)))
    barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] # a certain threshold
    barValue <- barValue[order(barValue)]
    if(length(barValue) == 0) {
      plotList[[seaName]] <- NA
      next
    }
    
    #for the heatmap and scatter plot below the heatmap
    allData <- cleanData$allExplain[[setNumber]]
    seaValue <- cleanData$allResponse[[seaName]]*2 #back to Z-score
    
    tabValue <- allData[, names(barValue),drop=FALSE]
    ord <- order(seaValue)
    seaValue <- seaValue[ord]
    tabValue <- tabValue[ord, ,drop=FALSE]
    sampleIDs <- rownames(tabValue)
    tabValue <- as.tibble(tabValue)
    
    #change scaled binary back to catagorical
    for (eachCol in colnames(tabValue)) {
      if (strsplit(eachCol, split = "[.]")[[1]][1] != "con") {
        tabValue[[eachCol]] <- as.integer(as.factor(tabValue[[eachCol]]))
      }
      else {
        tabValue[[eachCol]] <- tabValue[[eachCol]]*2 #back to Z-score
      }
    }
    
    tabValue$Sample <- sampleIDs
    #Mark different rows for different scaling in heatmap
    matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
    matValue$Type <- "mut"
    
    #For continuious value
    matValue$Type[grep("con.",matValue$Var)] <- "con"
    
    #for methylation_cluster
    matValue$Type[grep("cluster",matValue$Var)] <- "meth"
    
    #change the scale of the value, let them do not overlap with each other
    matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
    matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
    
    
    #color scale for viability
    idx <- matValue$Type == "con"
    
    myCol <- colorRampPalette(c('dark red','white','dark blue'), 
                              space = "Lab")
    if (sum(idx) != 0) {
      matValue[idx,]$Value = round(matValue[idx,]$Value,digits = 2)
      minViab <- min(matValue[idx,]$Value)
      maxViab <- max(matValue[idx,]$Value)
      limViab <- max(c(abs(minViab), abs(maxViab)))
      scaleSeq1 <- round(seq(-limViab, limViab,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    } else {
      scaleSeq1 <- round(seq(0,1,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    }
    
    
    
    #change continues measurement to discrete measurement
    matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
    
    #change order of heatmap
    names(barValue) <-  gsub("con.", "", names(barValue))
    matValue$Var <- gsub("con.","",matValue$Var)
    matValue$Var <- factor(matValue$Var, levels = names(barValue))
    matValue$Sample <- factor(matValue$Sample, levels = names(seaValue))
    #plot the heatmap
    p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + geom_tile(aes(fill=Value), color = "gray") + 
      theme_bw() + scale_y_discrete(expand=c(0,0)) + 
      theme(axis.text.y=element_text(hjust=0, size=8, color="black"), axis.text.x=element_blank(), 
            axis.ticks=element_blank(), panel.border=element_rect(colour="gainsboro"),  
            plot.title=element_text(size=8, color="black", face="bold"), 
            panel.background=element_blank(), panel.grid.major=element_blank(), 
            panel.grid.minor=element_blank()) + 
      xlab("patients") + ylab("") + scale_fill_manual(name="Mutated", values=c(color4viab, `11`="gray96", `12`='black', `21`='#DEEBF7', `22`='#9ECAE1',`23` = '#3182BD'),guide=FALSE) + ggtitle(seaName)
    
    #Plot the bar plot on the left of the heatmap
    barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
    
    p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) + 
      geom_bar(stat="identity", fill="lightblue", position = "identity", width=.8, alpha=0.75) + 
      theme_bw() + geom_hline(yintercept=0, size=0.5) + scale_x_discrete(expand=c(0,0.5)) + 
      scale_y_continuous(expand=c(0,0)) + coord_flip(ylim=c(min(barValue),max(barValue))) + 
      theme(panel.grid.major=element_blank(), panel.background=element_blank(), axis.ticks.y = element_blank(),
            panel.grid.minor = element_blank(), axis.text=element_text(size=8, color="black"), panel.border=element_blank(), axis.line.x = element_line(color="black", size=0.5))+
      xlab("") + ylab("") + geom_vline(xintercept=c(0.5), color="black")
    
    #Plot the scatter plot under the heatmap
    
    # scatterplot below
    scatterDF = data.frame(X=factor(names(seaValue), levels=names(seaValue)), Y=seaValue)
    
    p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + geom_point(shape=21, fill="grey80", colour="black", size=1.2) + theme_bw() + 
      theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=8, color="black"), panel.border=element_rect(colour="dimgrey", size=0.1), panel.background=element_rect(fill="gray96"))
    
    #Scale bar for continuous variable
    
    Vgg = ggplot(data=data.frame(x=1, y=as.numeric(names(color4viab))), aes(x=x, y=y, color=y)) + geom_point() + 
      scale_color_gradientn(name="Z-score", colours =color4viab) + theme(legend.title=element_text(size=12), legend.text=element_text(size=10))
    
    #Assemble all the plots togehter
    
    # construct the gtable
    wdths = c(1.5, 0.2, 1.3*ncol(matValue), 0.7, 1)
    hghts = c(0.1, 0.0010*nrow(matValue), 0.16, 0.5)
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    
    ## make grobs
    gg1 = ggplotGrob(p1)
    gg2 = ggplotGrob(p2)
    gg3 = ggplotGrob(p3)
    gg4 = ggplotGrob(Vgg)
    
    ## fill in the gtable
    gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 2, 1) # add barplot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 2, 3) # add heatmap
    gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 3) #add title to plot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 4, 3) # add scatterplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 3, 1) # y axis for barplot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 4, 2) # y axis for scatter plot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 2, 4) # variable names
    #gt = gtable_add_grob(gt, gtable_filter(gg4, "guide-box"), 2, 5) # scale bar for continous variables
    
    
    #plot
    #grid.draw(gt)
    plotList[[seaName]] <- gt
  }
  return(plotList)
}


#function for supplementary tables
stab_theme <- function(ft) {
  ft |>
    theme_alafoli() |>
    autofit() |> 
    fontsize(size = 8, part = "header") |> 
    fontsize(size = 8, part = "body") |> 
    padding(padding.top = 3, padding.bottom = 3, part = "all") |> 
    align(align = "left", part = "body") |>  
    align(align = "left", part = "header") |>
    bold(part = "header") |>
    width(j = 1, width = 1.25) |>
    width(j = 2, width = 3.5) |>
    width(j = 3, width = 0.75) |>
    line_spacing(space = 1, part = "all") |> 
    color(color = "black", part = "all")
}

2 Load data

2.1 File path

# Modify according to directory

#filepath <-"../../"
filepath <- "~/Dropbox/Medizin/Forschung/Aktuelle Forschungsprojekte/CLL CPS1000/figures/github/"

2.2 Load processed CPS1000 data

load(paste0(filepath,"data/CPS1000_data.RData"))

Description of datasets:
- screen: processed DMSO-normalized ex vivo viability measurements on the first sample of each patient used in the analysis - screen_allSamples: as above, but including all measurements - drugs: list of drugs used in the screen - patients: list of patients with corresponding diagnoses - metadata: annotation of genetic aberrations - treatments: treatment contexts for the longitudinal analysis of paired samples - survival: overall survival and time-to-treatment data - meth: Methylation-array data - rna: RNA-seq data - ddx3x (table, cps1000, literature): data on DDX3X mutations

3 Drugs

3.1 Drug overview (Fig. 2A)

#prepare data
drugs <- drugs |> as.data.frame()
as.factor(drugs$Pathway)
##  [1] MYC                   Apoptosis             PI3K/AKT/mTOR        
##  [4] EGFR                  Histone methylation   JAK/STAT             
##  [7] DNA damage response   PI3K/AKT/mTOR         FGFR                 
## [10] DNA damage response   ITK                   ALK                  
## [13] Chemotherapy          MAPK                  Chemotherapy         
## [16] Wnt                   MAPK                  ABL (BCR)            
## [19] Chemotherapy          PI3K/AKT/mTOR         Histone methylation  
## [22] Chemotherapy          HGF                   Stress pathway       
## [25] B-cell receptor       PI3K/AKT/mTOR         ABL (BCR)            
## [28] TLR                   MAPK                  DNA damage response  
## [31] MEN1                  Chemotherapy          MEN1                 
## [34] PKC                   PI3K/AKT/mTOR         TLR                  
## [37] DNA damage response   DNA damage response   Stress pathway       
## [40] B-cell receptor       Bromodomain           Cell cycle control   
## [43] Histone deacetylation Cytoskeleton          B-cell receptor      
## [46] Cytokine receptor     PI3K/AKT/mTOR         DNA damage response  
## [49] JAK/STAT              MAPK                  MAPK                 
## [52] Nuclear export        DNA damage response   Histone methylation  
## [55] Cytokine receptor     JAK/STAT              Cell cycle control   
## [58] MAPK                  Apoptosis             Apoptosis            
## [61] Hedgehog              PLK                   MAPK                 
## 28 Levels: ABL (BCR) ALK Apoptosis B-cell receptor ... Wnt
Fig2a <- ggplot(drugs, aes(x = factor(Pathway))) + 
geom_bar(aes(fill = Type)) + 
scale_fill_brewer(palette = "PuRd",  direction = 1) +
labs(y="Number of drugs", x="", fill="Drug type") +
theme_figures_angle45_x +
scale_y_continuous(expand = c(0.01,0), breaks=c(1,2,3,4,5,6,7,8)) +
scale_x_discrete(expand = c(0.025,0)) +
theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig2a

ggsave(path=(paste0(filepath,"figures")), filename = "Fig2a_drug_compounds.svg")
## Saving 18 x 9 in image

3.2 Drug list

#prepare data
drugs$Drug <- drugs$Drug |> 
  gsub("\\.", "-", x = _) |>
  gsub("D4476", "D 4476", x = _) |>
  gsub("IRAK1/4 Inhibitor I", "IRAK-1/4 Inhibitor I", x = _) |> 
  gsub("SB-203580", "SB 203580", x = _)

drugs$Drug <- sort(drugs$Drug) 
drug_list <- drugs |> 
  dplyr::select(Drug, Target, Pathway) #add supplier

#drug overview
drugs$Drug
##  [1] "10058-F4"             "A-1210477"            "A-674563"            
##  [4] "Afatinib"             "AMI-1"                "AZD1208"             
##  [7] "AZD6738"              "AZD8055"              "BGJ398"              
## [10] "BML-277"              "BMS-509744"           "Ceritinib"           
## [13] "Cisplatin"            "Cobimetinib"          "Cytarabine"          
## [16] "D 4476"               "Dabrafenib"           "Dasatinib"           
## [19] "Doxorubicine"         "Duvelisib"            "EPZ-6438"            
## [22] "Fludarabine"          "Foretinib"            "Ganetespib"          
## [25] "Ibrutinib"            "Idelalisib"           "Imatinib"            
## [28] "IRAK-1/4 Inhibitor I" "JNK-IN-8"             "KU-55933"            
## [31] "Menin-MLL Inhibitor"  "Methotrexate"         "MI-503"              
## [34] "Midostaurin"          "MK-2206"              "Motolimod"           
## [37] "Nutlin-3a"            "Olaparib"             "Onalespib"           
## [40] "ONO-4059"             "OTX015"               "Palbociclib"         
## [43] "Panobinostat"         "PF-3758309"           "PRT062607"           
## [46] "Quizartinib"          "Rapamycin"            "RO5963"              
## [49] "Ruxolitinib"          "SB 203580"            "SCH772984"           
## [52] "Selinexor"            "Selisistat"           "SGC 0946"            
## [55] "Sunitinib"            "Tofacitinib"          "Tozasertib"          
## [58] "Trametinib"           "TW-37"                "Venetoclax"          
## [61] "Vismodegib"           "Volasertib"           "ZM 336372"
#create table
stab3 <- flextable(drug_list) # |> stab_theme()

ft_grob <- gen_grob(stab3, fit = "width", scaling = "min", hjust = 0)

STab3 <- ggplot() +
  annotation_custom(ft_grob) +
  theme_void() +
  theme(plot.margin = margin(t = 1, r = 1, b = 1, l = 1, unit = "cm"))

ggsave(width=21, height=29.7, units = "cm", path=(paste0(filepath,"tables")), filename = "STab3_drugs_and_targets.svg")

4 Patient characteristics

4.1 Diagnoses (Fig. 2C)

#prepare data
patients <- patients |> 
  filter(diagnosis != "") |> 
  as.data.frame()

patients$cohort <- patients$cohort |> 
  str_replace("IC50", "IC50 + CPS1000") |>
  as.factor()
  
Fig2c <- ggplot(patients, aes(x = forcats::fct_infreq(diagnosis))) +
  geom_bar(aes(fill = cohort)) + 
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size=2.5) +
  scale_fill_brewer(palette = "Greens", direction =-1)+
  labs(y="Number of patients", x="", fill="Drug screen")+
  theme_figures_angle45_x +
  scale_y_continuous(expand = c(0.02,0), breaks=seq(0,300,50), limits = c(0,300))+
  scale_x_discrete(expand = c(0.04,0)) +
  theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig2c

ggsave(path=(paste0(filepath,"figures")), filename = "Fig2c_disease_distribution.svg")
## Saving 18 x 9 in image

4.2 Longitudinal data (Fig. 2D)

#prepare data
treatments$sample1 <- gsub("-[1234]$", "", treatments$sample1)
treatments$sample2 <- gsub("-[1234]$", "", treatments$sample2)

length(unique(treatments$patientID))
## [1] 110
#merge data
sample_date <- survival[,1:3]
merged_table <- merge(treatments, sample_date, by.x="sample1", by.y="sampleID", all.x=TRUE) |> 
  merge(patients[,c("patientID", "diagnosis")], by="patientID") |> 
  dplyr::rename(sampleDate1 = sampleDate)

#add date for 2. sampling and calculate time interval
merged_table <- merge(sample_date, merged_table, by.x="sampleID", by.y="sample2", ) |> 
  dplyr::rename(sample2 = sampleID, sampleDate2 = sampleDate) |> 
  dplyr::select(c(patientID, sample1, sample2, sampleDate1, sampleDate2, diagnosis)) |> 
  mutate(diff = sampleDate2-sampleDate1)

#plot
merged_table$diff <- as.numeric(merged_table$diff)
summary(merged_table$diff)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       7     224     515     578     884    1757
Fig2d <- ggplot(merged_table, aes(x="", y=diff)) +
  geom_boxplot(color="black", linewidth=0.5, outliers = FALSE, width=0.4) +
  geom_jitter(width = 0.1, alpha = 0.75, size=1, color="#74c476")+ 
  theme_figures+
  labs(x = "", y = "Days between samples")+
  theme(axis.ticks.x = element_blank())+
  scale_y_continuous(expand = c(0.02,0), breaks=c(seq(0,1750,500))) +
  theme(plot.margin = margin(t=1, r=1, l=1, b=1, unit="cm"))
Fig2d

ggsave(path=(paste0(filepath,"figures")), filename = "Fig2d_interval_samples.svg")
## Saving 3 x 6 in image

4.3 Oncoprint of genetic alterations in the CLL cohort (Fig. 2E)

#prepare data
#remove del5IgH
oncoprint_data <- merge(patients, metadata, by.x="patientID", by.y="Patient.ID") |> 
  filter(diagnosis.x == "CLL") |> 
  dplyr::select(!del5IgH)

#remove the first two columns (IGHV.status and Methylation_Cluster)
matrix_data <- as.matrix(oncoprint_data[, -c(1:13)])
rownames(matrix_data) <- oncoprint_data$patientID 
genetic_variations <- t(matrix_data)

#exclude aberrations with less than 8 events
genetic_variations <- data.matrix(genetic_variations)

genetic_variations_num <- matrix(as.numeric(genetic_variations), ncol = 275)
## Warning in matrix(as.numeric(genetic_variations), ncol = 275): NAs introduced
## by coercion
rownames(genetic_variations_num) <- rownames(genetic_variations)
colnames(genetic_variations_num) <- colnames(genetic_variations)

genetic_variations_num <- as.data.frame(genetic_variations_num)
genetic_variations_num$count <- rowSums(!(is.na(genetic_variations_num) | genetic_variations_num == 0)) 

genetic_variations_num <- genetic_variations_num |> 
  filter(count >7)
  
#re-adjust matrix für heatmap
genetic_variations_num[genetic_variations_num==0]<-""
genetic_variations_num[is.na(genetic_variations_num)]<-""

genetic_variations_red <- dplyr::select(genetic_variations_num, !count)
genetic_variations_red <- as.matrix(genetic_variations_red)

#annotations
#annotation for IGHV status
ighv_anno <- oncoprint_data[,"IGHV.status"]
ighv_anno <- ighv_anno %>% replace(is.na(.), "unknown")
ighv_colors <- colorRampPalette(brewer.pal(9, "Set1"))(length(unique(ighv_anno)))
annotation_colors <- setNames(ighv_colors, unique(ighv_anno))

#annotation for Methylation
meth_anno <- oncoprint_data[,"Methylation_Cluster"]
meth_anno <- factor(meth_anno, levels = c("HP", "IP","LP", "unknown"))
meth_anno <- meth_anno %>% replace(is.na(.), "unknown")
meth_colors <- colorRampPalette(brewer.pal(8, "Set2"))(length(unique(meth_anno)))
annotation_colors_meth <- setNames(meth_colors, unique(meth_anno))

#annotation for center
center_anno <- oncoprint_data[,"project"]
center_anno <- factor(center_anno, levels = c("HD", "HD-DNA", "H68", "MIR34a", "FB", "NOV", "MDACC", "MUN", "HCL_HD_Andrulis", "OX", "BARC", "ZCH_2"))
center_colors <- colorRampPalette(brewer.pal(3, "Paired"))(length(unique(center_anno)))

#change center label
center_anno <- fct_recode(center_anno,
"Zürich" = "ZCH_2",
"Barcelona" = "BARC",
"Heidelberg" = "HD")
annotation_colors_center <- setNames(center_colors, unique(center_anno))

#change IGHV label
ighv_anno <- fct_recode(ighv_anno,
"mutated" = "M",
"unmutated" = "U")
annotation_colors <- setNames(ighv_colors, unique(ighv_anno))

ha <- HeatmapAnnotation(Center = center_anno, IGHV = ighv_anno, Methylation = meth_anno, annotation_name_gp = gpar(fontsize = 8), col = list(Center =  annotation_colors_center, IGHV = annotation_colors, Methylation = annotation_colors_meth), annotation_legend_param = list(
    Center = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8)
    ),
    IGHV = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8)
    ),
    Methylation = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8)
    )))

heatmap_legend_param = list(title="Alteration", at = c("1"), title_gp = gpar(fontsize = 8), labels_gp = gpar(fontsize = 8))

#create the oncoprint
oncoprint <- oncoPrint(
    genetic_variations_red,
    alter_fun = list(
      background = function(x, y, w, h) {
        grid.rect(x, y, w, h, gp = gpar(fill = "#F8F8F8", col = "white"))
      },
      "1" = function(x, y, w, h) grid.rect(x, y, w, h, gp = gpar(fill = "black", col = "white")), col = "white"),
    col = c("1" = "black"), na_col = "white",
    remove_empty_rows = FALSE,
    remove_empty_columns = FALSE,
    row_names_gp = gpar(fontsize = 8),
    column_names_gp = gpar(fontsize = 8), 
    row_names_side = "left", pct_side = "right", pct_digits = 1,
    right_annotation = rowAnnotation(row_barplot = anno_oncoprint_barplot(c("1"), border = FALSE, height = unit(4, "cm"), axis_param = list(side = "bottom", labels_rot = 0))), 
    pct_gp = gpar(fontsize = 8), top_annotation = ha, heatmap_legend_param = heatmap_legend_param)
## All mutation types: 1.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.
draw(oncoprint, heatmap_legend_side = "left", annotation_legend_side = "right", show_heatmap_legend = FALSE)

Fig2e <- grid.grabExpr(draw(oncoprint, heatmap_legend_side = "left", annotation_legend_side = "right", show_heatmap_legend = FALSE))

ggsave(Fig2e, path=(paste0(filepath,"figures")), filename = "Fig2e_CLL_genetic_landscape.svg")
## Saving 12 x 8 in image

5 Drug response

5.1 Comparison with previous data sets (Fig. 2B)

## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).

## Saving 18 x 6 in image
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).

5.2 Drug response patterns (Fig. 3A)

#cap supravital viability results (cut-off <1.2)
screen_long$viability[screen_long$viability>1.2] <- 1.2
screen_long$viability[screen_long$viability<0] <- 0

#show diseases separately with at least 10 samples
pat_nr_per_disease <- patients |> 
  group_by(diagnosis) |> 
  summarize(n_pat = n()) |> 
  arrange(desc(n_pat))

diagnoses_show <- pat_nr_per_disease |> 
  filter(n_pat >=10) |> 
  pull(diagnosis)

df_tsne <- patients[,c("patientID", "diagnosis")] |> 
merge(screen, by="patientID") |> 
  dplyr::select(-patientID, -sampleID) |> 
  mutate(diagnosis= case_when(diagnosis %in% diagnoses_show ~ diagnosis,
                               TRUE ~ "other"))

#split dataframe in measures and subsetting
df_meas <- df_tsne[, !names(df_tsne) %in% "diagnosis"]
df_subset <- df_tsne[, names(df_tsne) %in% "diagnosis"]

#define order
order <- c("AML", "B-ALL", "B-PLL", "CLL", "MCL", "T-ALL", "T-PLL", "other")
df_subset <- factor(df_tsne$diagnosis, levels = order)

#t-sne
set.seed(12)
tsne_results <- Rtsne(df_meas, perplexity=25, check_duplicates = FALSE)

#plot
tsne_plot <- data.frame(
  x = tsne_results$Y[,1], 
  y = tsne_results$Y[,2],
  diagnosis = df_subset
)

Fig3a <- ggplot(tsne_plot) + 
  geom_point(aes(x=x, y=y, color=diagnosis), size=2, alpha=0.8) +
  theme_figures+
  labs(color="Diagnosis")+
  scale_color_manual(values=colors_diag)+
  theme(plot.margin = margin(t=2, b=2, r=1, l=1, unit="cm"))
Fig3a

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3a_tsne.svg")
## Saving 6 x 6 in image

5.3 Mean viability by drug and disease

5.3.1 Overall Heatmap (Fig. 3B)

#prepare data
screen_long_topdiag <- screen_long |> 
  mutate(diagnosis = case_when(diagnosis %in% diagnoses_show ~ diagnosis, 
                               TRUE ~ "other"))

#calculate mean viability
screen_means <- screen_long_topdiag |> 
  group_by(patientID, diagnosis, drug) |> 
    summarize(mean_viab = mean(viability))
## `summarise()` has grouped output by 'patientID', 'diagnosis'. You can override
## using the `.groups` argument.
screen_means
## # A tibble: 28,665 × 4
## # Groups:   patientID, diagnosis [455]
##    patientID diagnosis drug      mean_viab
##    <chr>     <chr>     <chr>         <dbl>
##  1 P0001     CLL       10058-F4      0.998
##  2 P0001     CLL       A-1210477     0.983
##  3 P0001     CLL       A-674563      0.898
##  4 P0001     CLL       AMI-1         1.00 
##  5 P0001     CLL       AZD1208       0.871
##  6 P0001     CLL       AZD6738       0.980
##  7 P0001     CLL       AZD8055       0.912
##  8 P0001     CLL       Afatinib      0.863
##  9 P0001     CLL       BGJ398        1.04 
## 10 P0001     CLL       BML-277       1.06 
## # ℹ 28,655 more rows
#merge with metadata for IGHV status in CLL, drop CLL without IGHV annotation
ighv <- metadata[, names(metadata) %in% c("Patient.ID", "IGHV.status")]

screen_means_ighv <- merge(screen_means, ighv, by.x = "patientID", by.y= "Patient.ID") |>     
  mutate(diagnosis = case_when(IGHV.status == "U" ~ "U-CLL",
                        IGHV.status == "M" ~ "M-CLL",
                        TRUE ~ diagnosis)) |> 
  filter (diagnosis != "CLL") |> 
  dplyr::select(-IGHV.status) 

#prepare for heatmap
df <- screen_means_ighv |> 
  dplyr::select(-diagnosis) |> 
  pivot_wider(names_from = patientID, values_from = mean_viab)

df_use <- df |> 
  dplyr::select(-drug) |> 
  colnames()

df_subset <- df |> 
  dplyr::select(-drug)

#diagnoses
df_diagnoses <- screen_means_ighv |> 
  filter(drug == "10058-F4") |> 
  dplyr::select(patientID, diagnosis)

#add pathways
df_pathway <- drugs[, names(drugs) %in% c("Drug", "Pathway_mod")]

df_combined <- merge(df_pathway, df, , by.x = "Drug", by.y = "drug")

#order pathways and diagnoses
df_combined$Pathway_mod <- factor(df_combined$Pathway_mod,
    levels = c("AKT/mTOR", "B-cell receptor", "Bromodomain/PLK", "Chemotherapy", 
               "DNA damage response", "JAK/STAT", "MAPK", "PI3K", "Stress pathway", "other"))
               
df_diagnoses$diagnosis <- factor(df_diagnoses$diagnosis,
    levels = c("AML", "B-ALL", "B-PLL", "M-CLL", "MCL", "T-ALL", "T-PLL", "U-CLL", "other"))

#scale
x <- data.frame(df_subset)
y <- data.frame(t(scale(t(x))))

#create matrix
rownames(y) = df$drug
colnames(y) = colnames(df_subset)
z <- as.matrix(y)

#annotations
color_fun_reversed <- colorRamp2(c(4, median(z), -4), c("red", "white", "blue"))

ha = HeatmapAnnotation(
  Diagnosis = df_diagnoses$diagnosis, 
  annotation_name_gp = gpar(fontsize = 8), 
  col = list(Diagnosis = colors_diag_IGHV),
  annotation_legend_param = list(
    Diagnosis = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8),
      nrow = 2
    )
  )
)

pway <- as.data.frame(df_combined[,1:2])
pway_ordered = pway[match(rownames(z), pway$Drug), ]

ha2 = rowAnnotation(
  Pathway = pway_ordered$Pathway_mod, 
  annotation_name_gp = gpar(fontsize = 8), 
  col = list(Pathway = colors_pathways_mod),
  annotation_legend_param = list(
    Pathway = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8),
      nrow = 2
    )
  )
)

#heatmap
h1 <- Heatmap(z, name = "Z-score", show_row_names = TRUE, show_column_names=FALSE, row_names_gp = gpar(fontsize = 8), width = ncol(z)*unit(0.25, "mm"), height = nrow(z)*unit(3.5, "mm"), col = color_fun_reversed, top_annotation = ha, left_annotation = ha2, show_column_dend = FALSE, row_km = 4, row_title = NULL, 
show_parent_dend_line = FALSE, heatmap_legend_param = list(direction = "horizontal", title_gp = gpar(fontsize = 8, fontface = "bold"), labels_gp = gpar(fontsize = 8)))

set.seed(1234)

#set spacing to legend
ht_opt(HEATMAP_LEGEND_PADDING = unit(1.5, "cm"))

Fig3b <- grid.grabExpr(draw(h1, merge_legends = TRUE, heatmap_legend_side = "bottom"))
Fig3b
## gTree[GRID.gTree.2263]
# Reset options
ht_opt(RESET = TRUE)  

#save?

5.3.2 Drug-specific heatmaps (Fig. S2)

5.3.3 Overview of drug effects (Fig. 3C)

#create diagnosis-specific means
screen_mean_diag <- screen_long |> 
  group_by(diagnosis, drug) |> 
  summarize(mean_diag = mean(viability)) |> 
  filter(diagnosis %in% diagnoses_show) |> 
  pivot_wider(names_from = diagnosis, values_from = mean_diag)
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#create heatmap
w <- screen_mean_diag[, names(screen_mean_diag) != "drug"]
rownames(w) = screen_mean_diag$drug
## Warning: Setting row names on a tibble is deprecated.
w <- as.matrix(t(w))

set.seed(123)
ha = rowAnnotation(foo = anno_text(rownames(w), gp = gpar(fontsize=8), just="right", offset=1))
## Warning: `offset` is deprecated, use `location` instead.
column_ha = HeatmapAnnotation(foo = anno_text(colnames(w), gp = gpar(fontsize=8), just = "left", offset =0))
## Warning: `offset` is deprecated, use `location` instead.
h2 <- Heatmap(w, name = "Mean viability", left_annotation = ha, 
top_annotation = column_ha, 
show_row_names = FALSE, show_column_names=FALSE, width = ncol(z)*unit(0.45, "mm"), height = nrow(z)*unit(0.4, "mm"), show_row_dend=TRUE, row_dend_side = "left", show_column_dend=FALSE,
row_dend_width = unit(7, "mm"), heatmap_legend_param = list(title_gp = gpar(fontsize = 8, fontface = "bold"), labels_gp = gpar(fontsize = 8)))

Fig3c_top <- draw(h2, heatmap_legend_side = "right", padding = unit(c(2, 2, 0, 2), "mm"))

#extract heatmap order
clustered_col_indices <- column_order(h2)
## Warning: The heatmap has not been initialized. You might have different results
## if you repeatedly execute this function, e.g. when row_km/column_km was
## set. It is more suggested to do as `ht = draw(ht); column_order(ht)`.
clustered_col_names <- colnames(w)[clustered_col_indices]

#create patient-specific means
screen_mean_pat <- screen_long |> 
  filter(diagnosis %in% diagnoses_show) |> 
  group_by(drug, patientID) |> 
  summarize(mean_pat = mean(viability)) |> 
  merge(drugs[,c("Drug", "Category")], by.x="drug", by.y="Drug") |> 
  mutate(drug = factor(drug, levels = clustered_col_names)) |> 
  arrange(drug)
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
#add boxplots  
Fig3c_bottom <- ggplot(screen_mean_pat, aes(x=drug, y=mean_pat, color=Category))+ 
  geom_boxplot(alpha=0.6, outlier.shape=NA) +
  geom_jitter(alpha=0.1, size=0.2, width = 0.15)+
  theme_figures+
  theme(axis.text.x = element_text(color="black", size=8, angle=90, hjust=1, vjust=0.5), 
        legend.position = "bottom",
        panel.spacing = unit(0, "mm"),
        plot.margin = margin(t=0, b=2, r=33, l=21, unit = "mm"))+
  scale_y_continuous(name ="Mean viability", breaks=seq(0,1.2, 0.2), limits=c(0,1.2)) +
  scale_x_discrete(name="")+
  scale_color_manual(values=colors_drug_type, name =  "Drug type", labels=c("Chemotherapy", "Kinase inhibitor", "Other"))
Fig3c_bottom
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Fig3c_bottom_aligned <- ggdraw(Fig3c_bottom) +
  theme(plot.margin = margin(0, 0, 0, 0))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Fig3c <- plot_grid(grid.grabExpr(draw(h2, heatmap_legend_side = "right")), Fig3c_bottom_aligned, ncol=1, rel_heights = c(0.5, 0.5))

#save?

5.3.4 Disease-specific drug effects

5.3.4.1 Overview (Fig. S3 and S4)

#create plots
aml <- create_boxplot(screen_means_ighv, "AML", colors_diag_IGHV, "AML")
ball <- create_boxplot(screen_means_ighv, "B-ALL", colors_diag_IGHV, "B-ALL")
tall <- create_boxplot(screen_means_ighv, "T-ALL", colors_diag_IGHV, "T-ALL")

SFig3 <- plot_grid(aml, ball, tall, NULL, NULL, ncol = 5, nrow = 1)
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point()`).
SFig3

ggsave(path=(paste0(filepath,"figures")), filename = "SFig3_viab_leucemia.svg", width=42, height=20, units = "cm")


bpll <- create_boxplot(screen_means_ighv, "B-PLL", colors_diag_IGHV, "B-PLL")
mcll <- create_boxplot(screen_means_ighv, "M-CLL", colors_diag_IGHV, "M-CLL")
mcl <- create_boxplot(screen_means_ighv, "MCL", colors_diag_IGHV, "MCL")
ucll <- create_boxplot(screen_means_ighv, "U-CLL", colors_diag_IGHV, "U-CLL")
tpll <- create_boxplot(screen_means_ighv, "T-PLL", colors_diag_IGHV, "T-PLL")

SFig4 <- plot_grid(bpll, mcll, mcl, ucll, tpll, ncol = 5, nrow = 1)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 51 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
SFig4

ggsave(path=(paste0(filepath,"figures")), filename = "SFig4_viab_lymphoma.svg", width=42, height=20, units = "cm")

5.3.4.2 Heatmap (Fig. 3D)

#prepare data
diagnoses_show_ighv <- c(diagnoses_show, "U-CLL", "M-CLL")
diagnoses_show_ighv <- diagnoses_show_ighv[diagnoses_show_ighv != "CLL"]

viabTab.mean <- screen_means_ighv |> 
  filter(diagnosis %in% diagnoses_show_ighv)

#create p-values
allDiag <- unique(viabTab.mean$diagnosis)

pTabAll <- lapply(allDiag, function(diagBase) {
  #calculate p value matrix
  pTab <- lapply(allDiag[allDiag != diagBase], function(diagCompare) {
    testTab <- filter(viabTab.mean, diagnosis %in% c(diagBase, diagCompare))  |> 
      mutate(diagnosis = factor(diagnosis, levels = c(diagBase,diagCompare))) 
    res <- group_by(testTab, drug) |> do(tTest(.$mean_viab, .$diagnosis)) |>
      mutate(diagCompare = diagCompare, diagBase = diagBase)
    res
  }) |> bind_rows() |> ungroup() |> mutate(p.adj = p.adjust(p, method = "BH"))
}) |> bind_rows()

#prepare data table for plot 
plotTab <- pTabAll |> mutate(p = ifelse(p < 1e-12, 1e-12, p)) |>
  mutate(pSign = -log10(p)*sign(diff)) |>
  mutate(pSign = ifelse(p.adj < 0.1, pSign, 0)) |>
  ungroup()

#order drugs by their association similarity (hierarchical clustering)
pMat <- mutate(plotTab, diagPair = paste0(diagBase,"_",diagCompare)) |>
  dplyr::select(drug, pSign, diagPair) |>
  spread(key = diagPair, value = pSign) |> data.frame() |>
  column_to_rownames("drug") |> as.matrix()

#cut
set.seed(123)
nClust = 9
hcRes <- hclust(dist(pMat), method = "ward.D2")
drugOrder <- rownames(pMat)[hcRes$order]
clustMap <- cutree(hcRes,nClust)

diagOrder <- c("U-CLL", "M-CLL","MCL", "B-PLL", "T-PLL", "B-ALL", "T-ALL","AML")
plotTab <- mutate(plotTab, drug = factor(drug, levels = drugOrder), 
                  diagBase = factor(diagBase, levels= diagOrder), 
                  diagCompare = factor(diagCompare, levels = diagOrder)) |>
  arrange(drug) |> mutate(drugClust = paste0("",nClust-clustMap[as.character(drug)]+1))

#color strips in y-direction
strip <- strip_themed(background_y = elem_list_rect(fill = "white"))

#add pathway annotation
plotTab <- merge(plotTab, drugs[,c("Drug", "Pathway_mod")], by.x="drug", by.y="Drug")

#plot
main_plot <- ggplot(plotTab, aes(x=diagCompare, y = drug, fill = pSign)) + 
  geom_tile(size = 0.3, color = "white") +
  facet_grid2(rows = vars(drugClust), cols = vars(diagBase), scales = "free", space = "free_y", strip = strip) + 
  scale_fill_gradient2(
    high = "blue", mid = "white", low = "red", midpoint = 0)+
  theme_figures_facet_angle60_x+
  labs(fill = "**-log<sub>10</sub>*P*<br>with direction**") +
  theme(legend.title = ggtext::element_markdown())+
  ylab("") + xlab("")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
main_plot

#add pathway annotation
anno_plot <- ggplot(plotTab, aes(x = diagCompare, y = drug, fill = Pathway_mod)) +
  geom_tile(size = 0.3) +
  facet_grid(rows = vars(drugClust), scales = "free", space = "free_y") +
  scale_fill_manual(values = colors_pathways_mod,
                    name = "Drug Class")+
  theme_minimal() +
  theme(axis.text = element_blank(),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_rect(linewidth = 0.75),
        panel.background = element_blank(),
        plot.background = element_blank(),
        strip.text.y = element_blank(),
        panel.spacing = unit(0.15, "lines"), 
        legend.position = "none")

Fig3d <- ggdraw() + 
  draw_plot(main_plot, x=0,y=0,width=1,height=1)+
  draw_plot(anno_plot,x=0.8612,y=0.075,width=0.03,height=0.898) & 
  theme(plot.margin = margin(t=1, unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3d_pvalue_heatmap.svg", width=42, height=30, units = "cm")

5.3.4.3 Drug ranking

#prepare data
#rank drugs
rankTab <- screen_long |> 
  group_by(patientID, drug, diagnosis) |> 
  summarise(meanViab = mean(viability)) |> 
  dplyr::ungroup()
## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
medTab <- screen_long |> 
  group_by(drug) |> 
  summarise(medVal = median(viability))
rankTab <- left_join(rankTab, medTab, by = "drug") |>  
  mutate(score = meanViab - medVal)

#make rank plot
rank_score_tab <- rankTab |> 
  group_by(patientID, diagnosis) |> 
  arrange((score)) |> 
  mutate(rank_score=row_number(), l2fc = log2(meanViab)-log2(medVal)) |> 
  ungroup()

#show ranks for AML with venetoclax and cytarabine
aml_cyt <- rank_score_tab |> 
filter(diagnosis %in% c("AML")) |> 
mutate(drug_col = ifelse(drug %in% c("Cytarabine"), drug, "other")) |>
ggplot(aes(x=rank_score, y=score, alpha=drug_col, group=patientID))+
geom_hline(yintercept = 0, color="grey70")+
geom_line(alpha=0.25, color="black")+
geom_point(aes(alpha=drug_col, fill=drug_col, color=drug_col), size=1.5, shape=21)+
scale_alpha_manual(values=c(0.75,0))+
scale_fill_manual(name="Drug", breaks=c("Cytarabine"), values=c("#E31A1C", ""))+
scale_color_manual(name="Drug", breaks=c("Cytarabine"), values=c("#E31A1C", ""))+
  labs(title="Cytarabine", y="Score", x="Drug rank")+
  theme_bw()+
  theme(axis.text = element_text(color="black", size=8),
  legend.position = "none", axis.title = element_text(size=8), plot.title = element_text(hjust=0.5, size=8),
  panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", linewidth = 1, fill = NA))

aml_ven <- rank_score_tab |> 
filter(diagnosis %in% c("AML")) |> 
mutate(drug_col = ifelse(drug %in% c("Venetoclax"), drug, "other")) |>
ggplot(aes(x=rank_score, y=score, alpha=drug_col, group=patientID))+
geom_hline(yintercept = 0, color="grey70")+
geom_line(alpha=0.25, color="black")+
geom_point(aes(alpha=drug_col, fill=drug_col, color=drug_col), size=1.5, shape=21)+
scale_alpha_manual(values=c(0, 0.75))+
scale_fill_manual(name="Drug", breaks=c("Venetoclax"), values=c("#E31A1C", ""))+
scale_color_manual(name="Drug", breaks=c("Venetoclax"), values=c("#E31A1C", ""))+
  labs(title="Venetoclax", y="Score", x="Drug rank")+
  theme_bw()+
  theme(axis.text = element_text(color="black", size=8),
  legend.position = "none", axis.title = element_text(size=8), plot.title = element_text(hjust=0.5, size=8),
  panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", linewidth = 1, fill = NA))

SFig8c <- aml_cyt + aml_ven + plot_annotation(title = "AML", theme = theme(plot.title = element_text(hjust=0.5, size=8, face="bold"))) + theme(plot.margin = margin(t=1, unit = "cm"))
SFig8c

#combine with IGHV annotation
cll_meta <- metadata[, c("Patient.ID", "diagnosis", "IGHV.status")] |> 
  filter(diagnosis == "CLL")

rank_score_tab_mod <- merge(rank_score_tab, cll_meta[,c("Patient.ID", "IGHV.status")], by.x="patientID", by.y="Patient.ID", all.x = TRUE) |> 
  mutate(diagnosis = case_when(IGHV.status == "U" ~ "U-CLL",
                               IGHV.status == "M" ~ "M-CLL",
                               TRUE ~ diagnosis)) |> 
                               filter(diagnosis != "CLL")

drug_ranking_diag <- rank_score_tab_mod |> 
  group_by(drug, diagnosis) |> 
  summarize(median_score = median(score), score_sd = sd(score)) |> 
  arrange(median_score) |> 
  group_by(diagnosis) |>
  mutate(rank_median = row_number())
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
# plot: aml
drug_order_aml <- drug_ranking_diag |> 
  filter(diagnosis == "AML") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_aml)  

rank_aml <- drug_ranking_diag |> 
  filter(diagnosis == "AML") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
rank_aml

# plot: u-cll
drug_order_ucll <- drug_ranking_diag |> 
  filter(diagnosis == "U-CLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_ucll)  

rank_ucll <- drug_ranking_diag |> 
  filter(diagnosis == "U-CLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag_IGHV)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))    
          
# plot: m-cll
drug_order_mcll <- drug_ranking_diag |> 
  filter(diagnosis == "M-CLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_mcll)  

rank_mcll <- drug_ranking_diag |> 
  filter(diagnosis == "M-CLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag_IGHV)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))            
          
# plot: b-all
drug_order_ball <- drug_ranking_diag |> 
  filter(diagnosis == "B-ALL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_ball)  

rank_ball <- drug_ranking_diag |> 
  filter(diagnosis == "B-ALL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))      
          
# plot: t-all
drug_order_tall <- drug_ranking_diag |> 
  filter(diagnosis == "T-ALL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_tall)  

rank_tall <- drug_ranking_diag |> 
  filter(diagnosis == "T-ALL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))            

# plot: mcl
drug_order_mcl <- drug_ranking_diag |> 
  filter(diagnosis == "MCL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_mcl)  

rank_mcl <- drug_ranking_diag |> 
  filter(diagnosis == "MCL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))  

# plot: tpll
drug_order_tpll <- drug_ranking_diag |> 
  filter(diagnosis == "T-PLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_tpll) 

rank_tpll <- drug_ranking_diag |> 
  filter(diagnosis == "T-PLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))  
          

# plot: bpll
drug_order_bpll <- drug_ranking_diag |> 
  filter(diagnosis == "B-PLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_bpll) 

rank_bpll <- drug_ranking_diag |> 
  filter(diagnosis == "B-PLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))          

SFig8b <- (rank_aml+plot_spacer()+rank_ball+plot_layout(widths = c(1, -0.1 ,1)))/(rank_bpll+plot_spacer()+rank_mcll+plot_layout(widths = c(1, -0.1 ,1)))/(rank_mcl+plot_spacer()+rank_ucll+plot_layout(widths = c(1, -0.1 ,1)))/(rank_tall+plot_spacer()+rank_tpll+plot_layout(widths = c(1, -0.1 ,1))) + theme(plot.margin = margin(t=1, unit = "cm"))

#calculate global ranks
#show all diseases
global_rank_tab <- rank_score_tab_mod |> 
group_by(patientID) |> 
mutate(global_score = sum(score)) |> 
arrange(global_score) |> 
group_by(drug) |> 
mutate(global_rank = row_number()) |> 
filter(drug == "Ibrutinib") |> 
#calculate median global rank per diagnosis
group_by(diagnosis) |> 
mutate(global_score_median_diag = median(global_score),
        global_rank_median_diag = median(global_rank))
        

#plot
diagSelected <- c("AML", "B-ALL", "T-ALL", "T-PLL", "B-PLL", "MCL", "U-CLL", "M-CLL")

global_rank_tab <- global_rank_tab |> 
  mutate(diag_col = ifelse(diagnosis %in% diagSelected, diagnosis, "other"))
  
global_rank_tab$diag_col <- factor(global_rank_tab$diag_col, levels = sort(diagSelected))

# Calculate the line for all data
reference_line <- global_rank_tab |>
  arrange(global_rank) |>
  dplyr::select(global_rank, global_score)
## Adding missing grouping variables: `diagnosis`
global_rank_tab <- global_rank_tab |> 
  filter(!is.na(diag_col)) 

score_ranking <- global_rank_tab |> 
  arrange(global_score_median_diag) |> 
  distinct(diag_col) |> 
  pull(diag_col)
  
global_rank_tab$diag_col <- factor(global_rank_tab$diag_col, levels = score_ranking)

# Now plot with facets
all_diag <- global_rank_tab |> 
  ggplot(aes(x=global_rank, y=global_score)) +
  geom_hline(data = global_rank_tab |> 
               distinct(diag_col, global_score_median_diag),
             aes(yintercept = global_score_median_diag), 
             color="grey90")+
  geom_line(data = reference_line, color="grey30", linewidth=0.5, inherit.aes=TRUE) +
  geom_hline(data = global_rank_tab |> 
               distinct(diag_col, global_score_median_diag),
             aes(yintercept = global_score_median_diag), 
             color="grey80")+
  geom_point(aes(color=diag_col), alpha=0.8, size=1.5) +
  #geom_point(aes(x=global_rank_median_diag, y=global_score_median_diag), color="grey80",size=1.5, shape=21) +
  facet_wrap(~diag_col, ncol=4) +
  scale_alpha_manual(values=c(rep(0.5, length(diagSelected)))) +
  scale_color_manual(values=colors_diag_IGHV)+
  labs(title="", y="Sum of drug scores", x="Sample rank") +
  theme_bw() +
  guides(alpha = "none", color="none")+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 0, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))

SFig8a <- all_diag + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))

#multipanel
row3 <- plot_grid(SFig8c, NULL, ncol = 2, rel_widths = c(0.4, 0.6))

SFig.5 <- plot_grid(SFig8a, SFig8b, row3, rel_heights = c(0.2, 0.65, 0.15), ncol = 1, 
                   align = "v", axis = c("l", "r"), 
                   labels=c("A", "B", "C"), label_size=14) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))


ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.png", width=42, height=59.4, units = "cm", dpi=600) #remove later

5.3.4.4 Signature drugs at the single concentration level (multivariate binary logistic regression)

5.3.4.5 Specific drugs

#prepare data
drugs_show <- c("Ibrutinib", "PF-3758309", "AZD8055", "Venetoclax", "Duvelisib") 
drug_targets <- c("Ibrutinib" = "BTK", "PF-3758309" = "PAK", "AZD8055" = "mTOR", "Venetoclax" = "BCL2", "Duvelisib" = "PI3K")

screen_long_selected_drugs <- screen_long |> 
  filter(drug %in% drugs_show) |> 
  group_by(drug, diagnosis, conc_index) |>
    mutate(mean_viab=mean(viability), 
    std.error=std.error(viability))

screen_long_selected_drugs$concentration <- as.character(screen_long_selected_drugs$concentration)

#show drug-response for BTK, PAK and mTOR inhibitor
#helper function for plots
viab_plot_list <- list()

viab_plot_list <- map(drugs_show, ~{
  screen_long_selected_drugs |> 
    filter(drug == .x, diagnosis %in% diagnoses_show) |> 
    ggplot(aes(x=concentration, y=mean_viab, group=diagnosis, color=diagnosis)) +
      geom_errorbar(aes(ymin=mean_viab-std.error, ymax=mean_viab+std.error), 
                    width=.5, position=position_dodge(0.01)) +
      geom_line() +
      theme_figures_angle45_x +
      labs(title=paste0(.x, " (", drug_targets[.x], ")"), 
           y="Viability", x=bquote("Concentration ("*mu*"M)"), 
           color="Diagnosis") +
      scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1)) +
      scale_color_manual(values=colors_diag)
}) |> 
  set_names(drugs_show)

#combine plots
Fig3e <- viab_plot_list$Ibrutinib + viab_plot_list$`PF-3758309` + viab_plot_list$AZD8055 + 
  plot_layout(ncol=3, guides = 'collect') & 
  theme(legend.position='right', legend.title= element_text(size=8, face="bold"), 
        plot.margin = margin(t=0.5, b=0.5, l=0.5, r=0.5, unit = "cm")) &
  guides(
    color = guide_legend(ncol = 1),
    fill = guide_legend(ncol = 1),
    shape = guide_legend(ncol = 1),
    linetype = guide_legend(ncol = 1)
  )
Fig3e

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3e_viab_btk_pak_mtor.svg")
## Saving 12 x 6 in image
#show drug-response for BCL2 inhibtor (venetoclax)
diagnoses_ven <- c("CLL", "AML", "B-ALL", "T-ALL", "MCL", "Sezary")

screen_long_ven <- screen_long_selected_drugs |> 
  filter(drug == "Venetoclax", diagnosis %in% diagnoses_ven) |> 
  group_by(diagnosis, concentration) |> 
  summarize(med_viab=median(viability))
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#fit dose-response models
screen_long_ven$concentration <- as.numeric(screen_long_ven$concentration)

models_ven <- map(diagnoses_ven, function(dx) {
  data <- screen_long_ven |> filter(diagnosis == dx)
  drm(med_viab ~ concentration, 
      data = data, 
      fct = LL.4(names = c("hill", "min_value", "max_value", "ec_50")))
})
names(models_ven) <- tolower(diagnoses_ven)

#create predictions based on models
predictions_ven <- map(diagnoses_ven, function(dx) {
  data <- screen_long_ven |> filter(diagnosis == dx)
  model <- models_ven[[tolower(dx)]]
  
  new_data <- data.frame(
    concentration = seq(min(data$concentration), max(data$concentration), length.out = 500)
  )
  new_data$pred <- predict(model, newdata = new_data)
  new_data$diagnosis <- dx
  new_data
}) |> 
  bind_rows()

#calculate IC50 values for all diagnoses
ic50_ven <- do.call(rbind, lapply(diagnoses_ven, function(dx) {
  model <- models_ven[[tolower(dx)]]
  coefs <- setNames(model$coefficients, c("hill", "min_value", "max_value", "ec_50"))
  
  ic_50 <- with(as.list(coefs), 
                exp(log(ec_50) + (1 / hill) * log(max_value / (max_value - 2 * min_value))))
  
  data.frame(ic50 = ic_50, diagnosis = dx)
}))
## Warning in log(max_value/(max_value - 2 * min_value)): NaNs produced
#set colors
colors_ven <- c(setNames("darkgrey", "Sezary"), colors_diag)

#order of diagnoses
order_ven <- ic50_ven |> 
  arrange(ic50) |> 
  pull(diagnosis)

#plot
data_ven <- screen_long_selected_drugs |> 
  filter(drug == "Venetoclax", diagnosis %in% diagnoses_ven)
data_ven$diagnosis <- factor(data_ven$diagnosis, levels = order_ven)
data_ven$concentration <- as.numeric(data_ven$concentration)

predictions_ven$diagnosis <- factor(predictions_ven$diagnosis, levels = order_ven)
ic50_ven$diagnosis        <- factor(ic50_ven$diagnosis, levels = order_ven)

Fig3f <- data_ven |> 
  ggplot(aes(x=concentration, y=viability, group=diagnosis, color=diagnosis)) +
    geom_boxplot(aes(group=concentration), alpha = 0.8, outlier.shape = NA) +
    geom_jitter(aes(group=concentration), alpha=0.2, size=1, width = 0.15) +
    facet_wrap(~diagnosis, ncol=6) +
    geom_line(data = predictions_ven, aes(x = concentration, y = pred, color = diagnosis))+
    geom_vline(data = ic50_ven, aes(xintercept = ic50), 
               linetype = "dashed", color = "black", size = 0.4) +
    scale_x_log10(breaks = c(0.001, 0.01, 0.1, 1),
                  labels = trans_format("log10", math_format(10^.x)))+
    scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1)) +
    theme_figures_facet+
    labs(title="Venetoclax (BCL2)", x=bquote("Concentration ("*mu*"M)"), y = "Viability") +
    scale_color_manual(values=colors_ven)+
    theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig3f
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3f_venetoclax.svg")
## Saving 12 x 6 in image
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
#duvelisib
Fig3g <- screen_long_selected_drugs |> 
  filter(diagnosis %in% c("B-ALL", "T-ALL"), drug == "Duvelisib") |> 
  ggplot(aes(x=factor(concentration), y=viability, group=interaction(diagnosis, concentration), color=diagnosis)) +
  geom_boxplot(alpha = 0.8, outlier.shape = NA)+
  geom_jitter(alpha = 0.2, size=1, position = position_jitterdodge())+
  labs(title="Duvelisib (PI3K)", y="Viability", x=bquote("Concentration ("*mu*"M)"), color = "Diagnosis")+
  scale_y_continuous(breaks=seq(0,1.2, 0.2), limits=c(0,1.2))+
  theme_figures_angle45_x+
  scale_color_manual(values=colors_diag)+
  theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig3g 

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3g_duvelisib.svg")
## Saving 12 x 6 in image
#cobimetinib (...)

#quizartinib (...)
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 22 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).

6 Drug-drug correlations

6.1 Drug-drug correlation heatmap

## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "cl.col" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "cl.col" is not a graphical parameter
## Warning in title(title, ...): "cl.col" is not a graphical parameter

## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "cl.col" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "cl.col" is not a graphical parameter
## Warning in title(title, ...): "cl.col" is not a graphical parameter

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning in as_grob.default(plot): Cannot convert object of class NULL into a
## grob.

6.2 Scatter plots for drug-drug correlations (Fig. 4B)

#select drugs
drugs_show_corr <- c("SCH772984", "Cobimetinib", "Ibrutinib", "Idelalisib", "Cobimetinib", "Rapamycin")

#helper function for CLL
corr_drugs_cll <- screen_means |> 
  filter(diagnosis == c("CLL"), drug %in% drugs_show_corr) |> 
  pivot_wider(names_from = drug, values_from = mean_viab)

corr_drugs_aml <- screen_means |> 
  filter(diagnosis == c("AML"), drug %in% drugs_show_corr) |> 
  pivot_wider(names_from = drug, values_from = mean_viab)

corr_drugs <- rbind(corr_drugs_cll, corr_drugs_aml)

plot_corr <- function(data, x, y, xlab, ylab) {
  
  corr_labels <- data |> 
    group_by(diagnosis) |> 
    summarize(corr=cor(.data[[x]], .data[[y]], use = "complete.obs"),
              .groups = "drop")
  
  ggplot(data, aes_string(x = x, y = y)) +
    geom_point(aes(color = diagnosis), alpha = 0.5) +
    geom_smooth(method = "lm", se = TRUE,
                color = "black", fill = "lightgrey") + 
    labs(x = xlab, y = ylab, color="Diagnosis")+
    facet_wrap(~diagnosis)+
    theme_figures + 
    theme(strip.background = element_blank(),
          strip.text.x = element_blank())+
    geom_text(data = corr_labels, aes(x = 0.8, y = 1.1, 
                                      label = paste0("italic(R) == ", round(corr, 2))), size=3, parse = TRUE) +
    xlim(0.4,1.2) + ylim(0.4,1.2) +
    scale_color_manual(values = colors_diag)
}

#plot
p1 <- plot_corr(corr_drugs, "SCH772984", "Cobimetinib", xlab="SCH772984 (ERK)", ylab= "Cobimetinib (MEK)")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p2 <- plot_corr(corr_drugs, "Ibrutinib", "Idelalisib", xlab="Ibrutinib (BTK)", ylab= "Idelalisib (PI3K)")
p3 <- plot_corr(corr_drugs, "Ibrutinib", "Cobimetinib", xlab="Ibrutinib (BTK)", ylab= "Cobimetinib (MEK)")
p4 <- plot_corr(corr_drugs, "Rapamycin", "Idelalisib", xlab="Rapamycin (mTOR)", ylab= "Idelalisib (PI3K)")

Fig4b <- p1+plot_spacer()+p2+plot_spacer()+p3+plot_spacer()+p4+plot_layout(ncol=7, widths = c(4, 0.5, 4, 0.5, 4, 0.5, 4), guides = 'collect') & theme(legend.position='right', plot.margin = margin(t=1, l=0.5, unit = "cm"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

7 Genotype-phenotype associations in CLL

7.1 P value scatterplot

#prepare data
#pTab <- ... (adapt from Junyans script)

pTab <- read.csv(paste0(filepath,"data/pTable_drug_VS_gene.csv"), sep=";") |> 
  mutate(across(c(p, p.adj, diff), ~as.numeric(gsub(",", ".", as.character(.)))))

#number of significant associations per gene (10% FDR)
plotTab1 <- filter(pTab, p.adj <= 0.1) %>% group_by(gene) %>%
  summarise(number = length(drug)) %>%
  arrange(desc(number)) %>% mutate(gene = factor(gene, levels = gene))

#p value scatter plot (IGHV not shown)
top_genes <- as.vector(unlist(plotTab1[2:8,"gene"]))
showGenes <- sort(top_genes)
append(showGenes, "other")
## [1] "DDX3X"     "del11q"    "del13q"    "del17p"    "gain19p"   "TP53"     
## [7] "trisomy12" "other"
#define colors for each gene
library(RColorBrewer)
colorList <- c(colorRampPalette(brewer.pal(7, 'Set2'))(7), 
               "grey80", "grey90")
  
names(colorList) = c(showGenes, "other", "Below 10% FDR")

colorList["del17p"] <- "#FC8D62" 
colorList["del11q"] <- "#E5C494"
colorList["trisomy12"] <- "#FFD92F"
colorList["DDX3X"] <- "#E78AC3"
colorList["TP53"] <- "#66C2A5"

#define shapes for viability
shapeList <- ifelse(names(colorList) %in% showGenes, 19,1)
names(shapeList) <- names(colorList)
shapeList["other"] <- 19

shapeList <- c(1, 25, 24)
names(shapeList) = c("Below 10% FDR", "Down", "Up")

#order drugs by their association similarity
pMat <- dplyr::select(pTab, drug, gene, p) %>% 
  filter(gene!="IGHV.status") %>%
  spread(key = gene, value = p) %>%
  column_to_rownames("drug") %>% 
  as.matrix()

hc <- hclust(dist(pMat), method = "ward.D2")
drugOrder <- rownames(pMat)[hc$order]

#order drugs by p value
pval_list <- pTab |> 
  filter(gene != "IGHV.status") |> 
  group_by(drug) |> 
  summarize(pval = min(p)) |> 
  arrange(pval, descending = FALSE)

drugOrder2 <- pval_list$drug

#prepare plot table
plotTab <- pTab %>% filter(gene != "IGHV.status") %>%
  mutate(gene = ifelse(gene %in% showGenes, gene, "other")) %>%
  mutate(gene_FDR = ifelse(p.adj < 0.1, gene, "Below 10% FDR"),
         drug = factor(drug, levels = drugOrder2),
         gene = factor(gene, levels = names(colorList)),
         direction = ifelse(gene_FDR == "Below 10% FDR", "Below 10% FDR", ifelse(sign(diff)>0, "Down", "Up"))) 

#plot
Fig5a <- ggplot(plotTab, aes(x=drug, y = -log10(p), color = gene, shape = direction)) +
  geom_point(size =2, alpha = 0.8, stroke = 1) + scale_color_manual(values = colorList) +
  scale_shape_manual(values = shapeList, name = "Viability effect") + theme_bw() +
  labs(x="", y = expression(-log[10]*italic(P)), color = "Genetic aberration")+
  theme_figures_border+
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1),
        panel.grid.major.x = element_line(size=.1, color="grey50", 
                                          linetype = "dotted"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        legend.position = c(.95, .95),
        legend.justification = c("right", "top"),
        legend.box.just = "right",
        legend.box = "horizontal", 
        legend.margin = margin(6, 6, 6, 6))+
  geom_hline(yintercept=-log10(0.004706046), linetype = "dashed", linewidth=0.3, color="black")+
  annotate("text", label="FDR 10%", size=3, x=60, y=3)+
  guides(shape = guide_legend(order = 1),
         color = guide_legend(order = 2))

Fig5a <- Fig5a + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
Fig5a

ggsave(path=(paste0(filepath,"figures")), filename = "Fig5a_pvalue_scatterplot.svg")
## Saving 18 x 9 in image

7.2 Volcano plots (for IGHV, trisomy12 and TP53)

#IGHV
#perform t-tests for each drug and concentration level
p_values <- screen_long |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(IGHV.status))) |> 
  group_by(drug, conc_index) |> 
  summarize(tTest(viability, IGHV.status), .groups = "drop") |> 
  rename("meanU" = "mean1", "meanM" = "mean2") |> 
  ungroup() |> 
  mutate(p.adj = p.adjust(p, method = "BH"), 
         neg_log10_p = -log10(as.vector(p)))

#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall <- screen_means |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(IGHV.status))) |> 
  group_by(drug, IGHV.status) |> 
  summarize(overall_mean = mean(mean_viab), .groups = "drop") |>  
  pivot_wider(names_from = "IGHV.status", values_from = "overall_mean") |> 
  mutate(overall_diff_viab = (U-M)/(U)*100)

#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall <- p_values |> 
  mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |> 
  group_by(drug) |> 
  summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |> 
  left_join(diffviab_overall, by="drug") |> 
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug") 

#define order
order_pway2 <- sort(unique(diffviab_p_values_overall$Pathway_mod2))
order_pway2 <- c(order_pway2[order_pway2 != "other"], "other")
diffviab_p_values_overall$Pathway_mod2 <- factor(diffviab_p_values_overall$Pathway_mod2, levels = order_pway2)

#plot
set.seed(123)
volcano_ighv <- ggplot(diffviab_p_values_overall, aes(x = overall_diff_viab, y = logpval, label=drug)) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "black") +
  geom_vline(xintercept = 0, 
             color = "black") +
  geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
  geom_text_repel(
    text.padding = 0.5,
    box.padding = 0.5,
    max.overlaps = 10,
    min.segment.length = 2,
    size=3)+
  theme_figures+
  labs(
    title = "IGHV",
    x = "Difference in mean viability (%)",
    y = expression(-log[10]*italic(P)),
    color="Pathway", size="Number of sign. \ndifferent concentrations") +
  scale_x_continuous(limits = c(-20, 20)) +
  scale_color_manual(values=colors_pathways_mod2) +
  guides(color = guide_legend(override.aes = list(size = 4)))+
  annotate("text", label="more active\nin U-CLL", y=20, x=-10, size=3)+
  annotate("text", label="more active\nin M-CLL", y=20, x=10, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
Fig5b_top <- volcano_ighv + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig5b_top
## Warning: ggrepel: 49 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#trisomy12
#perform t-tests for each drug and concentration level
p_values_tri12 <- screen_long |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(trisomy12))) |> 
  group_by(drug, conc_index) |> 
  summarize(tTest(viability, trisomy12), .groups = "drop") |> 
  rename("mean_mut" = "mean1", "mean_unmut" = "mean2") |> 
  ungroup() |> 
  mutate(p.adj = p.adjust(p, method = "BH"), 
         neg_log10_p = -log10(as.vector(p)))

#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall_tri12 <- screen_means |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(trisomy12))) |> 
  group_by(drug, trisomy12) |> 
  summarize(overall_mean = mean(mean_viab), .groups = "drop") |>  
  pivot_wider(names_from = "trisomy12", values_from = "overall_mean") |> 
  rename("mut" = "1", "unmut" = "0") |> 
  mutate(overall_diff_viab = (mut-unmut)/(mut)*100)

#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall_tri12 <- p_values_tri12 |> 
  mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |> 
  group_by(drug) |> 
  summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |> 
  left_join(diffviab_overall_tri12, by="drug") |> 
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug") 

#plot
diffviab_p_values_overall_tri12$Pathway_mod2 <- factor(diffviab_p_values_overall_tri12$Pathway_mod2, levels = order_pway2)

volcano_tri12 <- ggplot(diffviab_p_values_overall_tri12, aes(x = overall_diff_viab, y = logpval, label=drug)) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "black") +
  geom_vline(xintercept = 0, 
             color = "black") +
  geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
  geom_text_repel(
    text.padding = 0.5,
    box.padding = 0.5,
    max.overlaps = 10,
    min.segment.length = 2,
    size=3)+
  theme_figures+
  labs(
    title = "trisomy12",
    x = "Difference in mean viability (%)",
    y = expression(-log[10]*italic(P)),
    color="Pathway", size="Number of sign. \ndifferent concentrations") +
  scale_x_continuous(limits = c(-20, 20)) +
  scale_color_manual(values=colors_pathways_mod2) +
  guides(color = guide_legend(override.aes = list(size = 4)))+
  annotate("text", label="more active\nin trisomy12", y=15, x=-10, size=3)+
  annotate("text", label="more active\nin wt", y=15, x=10, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
Fig5b_bottom <- volcano_tri12 + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig5b_bottom
## Warning: ggrepel: 53 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#TP53
#perform t-tests for each drug and concentration level
p_values_tp53 <- screen_long |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(TP53))) |> 
  group_by(drug, conc_index) |> 
  summarize(tTest(viability, TP53), .groups = "drop") |> 
  rename("mean_mut" = "mean1", "mean_unmut" = "mean2") |> 
  ungroup() |> 
  mutate(p.adj = p.adjust(p, method = "BH"), 
         neg_log10_p = -log10(as.vector(p)))

#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall_tp53 <- screen_means |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(TP53))) |> 
  group_by(drug, TP53) |> 
  summarize(overall_mean = mean(mean_viab), .groups = "drop") |>  
  pivot_wider(names_from = "TP53", values_from = "overall_mean") |> 
  rename("mut" = "1", "unmut" = "0") |> 
  mutate(overall_diff_viab = (mut-unmut)/(mut)*100)

#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall_tp53 <- p_values_tp53 |> 
  mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |> 
  group_by(drug) |> 
  summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |> 
  left_join(diffviab_overall_tp53, by="drug") |> 
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug") 

#plot
diffviab_p_values_overall_tp53$Pathway_mod2 <- factor(diffviab_p_values_overall_tp53$Pathway_mod2, levels = order_pway2)

volcano_tp53 <- ggplot(diffviab_p_values_overall_tp53, aes(x = overall_diff_viab, y = logpval, label=drug)) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "black") +
  geom_vline(xintercept = 0, 
             color = "black") +
  geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
  geom_text_repel(
    text.padding = 0.5,
    box.padding = 0.5,
    max.overlaps = 10,
    min.segment.length = 2,
    size=3)+
  theme_figures+
  labs(
    title = "TP53",
    x = "Difference in mean viability (%)",
    y = expression(-log[10]*italic(P)),
    color="Pathway", size="Number of sign. \ndifferent concentrations") +
  scale_x_continuous(limits = c(-15, 15)) +
  scale_color_manual(values=colors_pathways_mod2) +
  guides(color = guide_legend(override.aes = list(size = 4)))+
  annotate("text", label="more active\nin mutated", y=8, x=-7.5, size=3)+
  annotate("text", label="more active\nin wt", y=8, x=7.5, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
FigS7f <- volcano_tp53 + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
FigS7f
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

7.3 Dose-response relationship in DDX3X-mutated U-CLL

#select B-cell receptor inhibitors, show response for U-CLL
drugs_bcr <- c("Ibrutinib", "ONO-4059", "PRT062607")

df_bcr <- screen_long |>
  filter(diagnosis == "CLL", drug %in% drugs_bcr) |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "DDX3X")], by.x = "patientID", by.y = "Patient.ID") |> 
  filter(IGHV.status == "U", !is.na(DDX3X))
  
#plot
bcr_ddx3x <- df_bcr |> 
  mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)",
    drug == "ONO-4059" ~ "ONO-4059 (BTK)",
    drug == "PRT062607" ~ "PRT062607 (SYK)",
    TRUE ~ drug)) |> 
  ggplot(aes(x=factor(concentration), y=viability, color=DDX3X)) + 
  geom_boxplot(alpha = 0.3, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(alpha = 0.25, position = position_jitterdodge()) + 
  facet_wrap(.~drug) + 
  theme_figures_facet_angle45_x_legend+
  stat_compare_means(aes(group = DDX3X), 
                    method = "t.test",
                    label = "p.signif",
                    label.y = 1.1, 
                    size=3)+
  labs(y = "Viability", x = bquote("Concentration ("*mu*"M)"), title ="U-CLL")+
  scale_color_manual(values=colors_ddx3x, labels=c("unmutated", "mutated")) +
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.15)) +
  guides(color = guide_legend(override.aes = list(label = "")))

Fig5c <- bcr_ddx3x + theme(plot.margin = margin(t=0.5, l=1, r=1, b=1, unit = "cm"))
Fig5c

7.3.1 BTK vs PI3K in DDX3X mutated CLL

#select ibrutinib and idelalisb, show response for U-CLL
drugs_ddx3x <- c("Ibrutinib", "Idelalisib")

btk_pi3k <- screen_long |>
  filter(diagnosis == "CLL", drug %in% drugs_ddx3x) |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "DDX3X")], by.x = "patientID", by.y = "Patient.ID") |> 
  filter(IGHV.status == "U", !is.na(DDX3X)) |> 
  mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)", drug == "Idelalisib" ~ "Idelalisib (PI3K)")) |> 
  pivot_wider(names_from = "drug", values_from = "viability")

#plot
btk_pi3k_ddx3x <- btk_pi3k |> 
    ggplot(aes(x=`Idelalisib (PI3K)`, y=`Ibrutinib (BTK)`, color=DDX3X)) + 
    geom_point(alpha = 0.75) + 
    geom_abline(slope=1, linetype="dashed")+
    scale_color_manual(values=colors_ddx3x, labels=c("unmutated", "mutated"))+
    theme_figures_facet+
    scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.3,1.1)) +
    scale_x_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.3,1.1)) +
    guides(color = guide_legend(override.aes = list(label = ""))) +
    facet_wrap(~concentration, ncol=5)+
    labs(title=expression(bold(paste("Concentration (", mu, "M)"))), x="Idelalisib (PI3K)", y= "Ibrutinib (BTK)")

SFig11b <- btk_pi3k_ddx3x + theme(plot.margin = margin(l = 1, r = 1, t=1, b=1.5, unit = "cm"))
SFig11b

7.4 Lasso model

## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `HIPO.ID = .Primitive("as.integer")(HIPO.ID)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

7.5 del11q

#select B-cell receptor inhibitors
del11q <- screen_means |>
  filter(diagnosis == "CLL", drug %in% drugs_bcr) |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "del11q")], by.x = "patientID", by.y = "Patient.ID") |> 
  filter(!is.na(IGHV.status), !is.na(del11q)) |> 
  group_by(patientID, drug) |> 
  mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)", drug == "ONO-4059" ~ "ONO-4059 (BTK)", drug == "PRT062607" ~ "PRT062607 (SYK)"))

del11q_ighv <- del11q |> 
  filter(drug == "Ibrutinib (BTK)") |> 
  ggplot(aes(x=del11q, y=mean_viab, color=del11q)) +
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(aes(shape=IGHV.status), width = 0.1, alpha = 0.75) + 
  scale_color_manual(values=colors_del11q, labels=c("unmutated", "mutated")) +
  scale_shape_discrete(name="IGHV", labels=c("unmutated", "mutated")) +
  facet_wrap(~drug)+
  theme_figures_facet+
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "right",
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))+
  labs(y = "Mean viability", x="", title="CLL") + 
  stat_compare_means(aes(group = del11q), method = "t.test", label = "p.signif", label.x.npc= "middle", label.y = 1.05, size=3)+
  scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.4,1.1)) +
  guides(color = guide_legend(override.aes = list(label = "")))

SFig10a <- del11q_ighv + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))
SFig10a

#show effect of B-cell receptor inhibitors in U-CLL only
del11q_ucll <- del11q |> 
  filter(IGHV.status == "U") |> 
  ggplot(aes(x=del11q, y=mean_viab, color=del11q)) +
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(aes(shape=IGHV.status), width = 0.1, alpha = 0.75) + 
  scale_color_manual(values=colors_del11q, labels=c("unmutated", "mutated")) +
  facet_wrap(~drug)+
  theme_figures_facet+
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "right",
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))+
  labs(y = "Mean viability", x="", title="U-CLL") + 
  stat_compare_means(aes(group = del11q), method = "t.test", label = "p.signif", label.x.npc= "middle", label.y = 1.05, size=3)+
  scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.4,1.1)) +
  guides(color = guide_legend(override.aes = list(label = "")), shape = "none")

SFig10b <- del11q_ucll + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))
SFig10b

7.6 trisomy(12) in M-CLL and U-CLL

7.7 Fludarabine and nutlin-3a in TP53-mutated CLL

8 DDX3X

8.1 Overview of mutations

#DDX3X mutations in CPS1000
ddx3x_table
## # A tibble: 8 × 8
##   ID    Sex   IGHV  `DNA mutation` `Amino acid alteration`  Exon
##   <chr> <chr> <chr> <chr>          <chr>                   <dbl>
## 1 P0293 M     U     c.845dupG      p.C282fs                    9
## 2 P0538 M     U     c.1115_1122del p.E372fs                   10
## 3 P0696 F     U     c.A991G        p.M331V                    10
## 4 P0711 M     U     c.C1229G       p.S410C                    12
## 5 P0880 F     U     c.C1184T       p.T395I                    11
## 6 P0987 M     U     c.C1120T       p.Q374X                    10
## 7 P1027 M     U     c.T1256G       p.L419R                    11
## 8 P1075 M     U     c.C625T        p.Q209X                     6
## # ℹ 2 more variables: `Type of alteration` <chr>, `Allele frequency` <dbl>
ddx3x_table$`Allele frequency` <- round(ddx3x_table$`Allele frequency`,2)

#create table
ft <- flextable(ddx3x_table) |> 
  theme_alafoli()
  
ft <-  ft |> 
  flextable::fontsize(size = 8, part = "header") |> 
  flextable::fontsize(size = 8, part = "body") |> 
  flextable::padding(padding.top = 3, padding.bottom = 3, part = "all") |> 
  flextable::align(align = "center", part = "all") |> 
  flextable::bold(part = "header") |>
  flextable::width(j=c(1,2,3,6,8), width = 0.1) |> 
  flextable::width(j=c(4,7), width = 0.25) |>
  flextable::width(j=c(5), width = 0.5) |>
  flextable::line_spacing(space = 1.15, part = "all") |>
  flextable::color(color = "black", part = "all") |> 
  #add header with Greek letter mu (μ)
  flextable::vline(j = "ID")
  
SFig11a <- gen_grob(ft, fit = "width", scaling = "min", hjust = 0)

8.1.1 Lollipop plot

#prepare data
mutations <- ddx3x_cps1000[,c("position","mutation", "type_adjusted")]
mutations$frequency <- c(rep(1,times=8))

ddx3x_literature <- ddx3x_literature |>  
  group_by(position) |>  
  mutate(count = n())

mutations_lit <- ddx3x_literature[,c("position","mutation", "type_adjusted")]
mutations_lit$frequency <- ddx3x_literature$count*(-1)

mutations <- rbind(mutations, mutations_lit)

mutations <- mutations |> 
  mutate(type_adjusted = case_when(
    type_adjusted == "multiple" ~ "Multiple",
    TRUE ~ type_adjusted
  ))

#combine mutations into one label
mutations_combined <- mutations %>%
  group_by(position, frequency) %>%
  summarise(
    mutations = paste(unique(mutation), collapse = ", "),
    .groups = "drop"
  )

#protein domain data (provide source)
domains <- data.frame(
  start = c(182, 414),
  end = c(404, 544),
  domain = c("Helicase domain 1", "Helicase domain 2")
)

RNA_domains <- data.frame(
  start = c(274, 302, 324, 445, 471, 495),
  end = c(280, 303, 342, 450, 480, 504),
  domain = c("RNA binding")
)

ATP_domains <- data.frame(
  start = c(343, 525),
  end = c(352, 536),
  domain = c("ATP binding")
)

color_data <- rbind(domains,ATP_domains,RNA_domains)

#protein length
protein_length <- 662

#set colors
mycolors_type <- colorRampPalette(brewer.pal(6, "Set2"))(6)
names(mycolors_type) <- unique(mutations$type_adjusted)

mycolors_domain <- colorRampPalette(brewer.pal(4, "Set3"))(4)
mycolors_domain<- alpha(mycolors_domain, 1)
names(mycolors_domain) <- c("Helicase domain 1", "Helicase domain 2", "ATP binding", "RNA binding")

#plot
lollipop <- ggplot() +
  #add mutation lollipops
  geom_segment(data = mutations,
               aes(x = position, xend = position,
                   y = 0, yend = frequency),
               color = "grey50", alpha = 0.5, size = 0.25) +
  
  geom_point(data = mutations,
             aes(x = position, y = frequency, color = type_adjusted), 
             size = 2) +
  
  #base rectangle representing the gene
  statebins:::geom_rrect(aes(xmin = 0, xmax = protein_length,
                ymin = -0.25, ymax = 0.25),
            fill = "white", color = "black", size=0.4) +
  
  #add protein domains as colored rectangles
  geom_rect(data = domains,
            aes(xmin = start, xmax = end,
                ymin = -0.25, ymax = 0.25, fill = domain),
            color = "white", size=0.25) +
 
  #add RNA binding domains
  geom_rect(data = RNA_domains,
            aes(xmin = start, xmax = end,
                ymin = -0.25, ymax = 0.25,
                fill = domain),
            color = "white", size=0.25) +  
            
  #add ATP binding domains
    geom_rect(data = ATP_domains,
            aes(xmin = start, xmax = end,
                ymin = -0.25, ymax = 0.25,
                fill = domain),
            color = "white", size=0.25) +
            
  #add base rectangle with transparent fill
  statebins:::geom_rrect(aes(xmin = 0, xmax = protein_length,
                ymin = -0.25, ymax = 0.25),
            fill = "transparent", color = "black", size=0.4) +
            
  #add mutation labels
  geom_text_repel(data = mutations_combined,
                aes(x = position, y = frequency,
                    label = mutations),
                size = 3,
                color = "black",
                angle = 90,
                force = 0.1,         
                force_pull = 0.1,    
                point.padding = 0.5,
                box.padding = 0.3,
                min.segment.length = 0,
                segment.color = "grey80",
                segment.alpha = 0.5,
                segment.size = 0.25,
                nudge_y = ifelse(mutations_combined$frequency < 1, -1, 0.5),
                max.overlaps = Inf,
                max.iter = 3000)+
  
  #add domain labels
  geom_text(data = color_data,
            aes(x = (start + end)/2, y = 0,
                label = "")) +
  
  #labels
  labs(
    x = "Amino acid position", y = "Number of mutations", color = "Type of mutation", fill= "Domain", title = "DDX3X"
  ) +
  
  #customize theme
  theme_minimal() +
  theme(panel.grid = element_blank(),
        axis.text = element_text(size = 8, color = "black"),
        axis.title.y = element_text(size = 8, hjust = 0.45), axis.title.x = element_text(size = 8),
        axis.ticks.x = element_line(color = "black", size = 0.5),  # Add x-axis ticks
        axis.ticks.y = element_line(color = "black", size = 0.5),  # Add y-axis ticks
        axis.ticks.length = unit(5, "pt"),  # Set the length of tick marks
        legend.position = "right", 
        legend.title = element_text(size = 8, face="bold"), 
        legend.key.size = unit(0.5, 'cm'),
        plot.title = element_text(hjust=0.5, size=8, face="bold")
        ) +

  #scale adjustments
  scale_y_continuous(
    expand = c(0, 0), limits = c(-9, 3), breaks = (seq(-6, -1, 1)), labels = c(6,5,4,3,2,1))+
  scale_x_continuous(limits = (c(-20,680)), breaks = c(1,100,200,300,400,500,600,662), expand = (c(0,0))) +
  geom_segment(aes(x = 1, y = -9, xend = 662, yend = -9), color = "black", size = 0.75)+
  geom_segment(aes(x = -20, y = -6.015, xend = -20, yend = -0.985), color = "black", size = 0.75)+
  scale_color_manual(values=mycolors_type)+
  scale_fill_manual(values=mycolors_domain)+
  guides(fill = guide_legend(override.aes = list(colour = NA)))
  
Fig5e <- lollipop + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
Fig5e

8.2 RNA-seq

## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 740 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## 
## out of 15127 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 13, 0.086%
## LFC < 0 (down)     : 4, 0.026%
## outliers [1]       : 73, 0.48%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 17
## [1] "Intercept"                          "condition_cps_unmutated_vs_mutated"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## log2 fold change (MAP): condition cps unmutated vs mutated 
## Wald test p-value: condition cps unmutated vs mutated 
## DataFrame with 15127 rows and 5 columns
##                   baseMean log2FoldChange     lfcSE     pvalue      padj
##                  <numeric>      <numeric> <numeric>  <numeric> <numeric>
## ENSG00000000003     1.9101     0.00289367 0.0846446 0.63193053  0.903934
## ENSG00000000419  2258.7670     0.36187463 0.3585549 0.00449449  0.393372
## ENSG00000000457  1022.1373    -0.02142053 0.0837562 0.39599811  0.805297
## ENSG00000000460  1218.4487    -0.00499457 0.0824318 0.79740023  0.957569
## ENSG00000000938 29787.4417    -0.01781967 0.0846137 0.39643908  0.805324
## ...                    ...            ...       ...        ...       ...
## ENSG00000273045   62.32308   -0.000905656 0.0817699   0.962356  0.994221
## ENSG00000273079   19.23316    0.004005899 0.0849180   0.110130  0.603490
## ENSG00000273155    2.72231    0.002674422 0.0842020   0.799143  0.957902
## ENSG00000273173   98.67154   -0.009399929 0.0833434   0.594818  0.892583
## ENSG00000273294    8.97583    0.008131371 0.0835206   0.641268  0.908161
##                             pathway     pval     padj log2err    ES   NES  size
##                              <char>    <num>    <num>   <num> <num> <num> <int>
## 1: HALLMARK_TNFA_SIGNALING_VIA_NFKB 5.41e-14 2.70e-12   0.965 0.523  2.25   188
## 2:          HALLMARK_MYC_TARGETS_V1 2.48e-13 6.20e-12   0.933 0.501  2.18   199
## 3:          HALLMARK_G2M_CHECKPOINT 6.21e-08 1.04e-06   0.705 0.438  1.91   196
## 4:   HALLMARK_INFLAMMATORY_RESPONSE 1.05e-06 1.31e-05   0.644 0.430  1.84   171
## 5:             HALLMARK_E2F_TARGETS 1.56e-06 1.56e-05   0.644 0.413  1.80   199
## 6:       HALLMARK_KRAS_SIGNALING_UP 2.57e-06 2.15e-05   0.627 0.432  1.84   156
##     leadingEdge
##          <list>
## 1: BTG3, G0....
## 2: MCM2, SN....
## 3: TOP2A, M....
## 4: NLRP3, C....
## 5: ATAD2, C....
## 6: G0S2, MA....
## New names:
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(ucll_knis)
## 
##   # Now:
##   data %>% select(all_of(ucll_knis))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 2321 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## 
## out of 16735 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 45, 0.27%
## LFC < 0 (down)     : 13, 0.078%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 58
## [1] "Intercept"                           "condition_knis_unmutated_vs_mutated"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## log2 fold change (MAP): condition knis unmutated vs mutated 
## Wald test p-value: condition knis unmutated vs mutated 
## DataFrame with 16735 rows and 5 columns
##                   baseMean log2FoldChange      lfcSE    pvalue      padj
##                  <numeric>      <numeric>  <numeric> <numeric> <numeric>
## ENSG00000186092    1.67207    8.91947e-06 0.00144270  0.595549  0.998109
## ENSG00000187634  267.57342    1.97106e-06 0.00144264  0.849780  0.998109
## ENSG00000188976 3313.36386    7.83263e-06 0.00144255  0.380360  0.998109
## ENSG00000187961  311.10937    2.16735e-06 0.00144268  0.482642  0.998109
## ENSG00000187583   27.53665    7.18020e-07 0.00144268  0.803700  0.998109
## ...                    ...            ...        ...       ...       ...
## ENSG00000212907    90027.8    5.35529e-06 0.00144268  0.191985  0.969190
## ENSG00000198886   506468.1    5.87490e-06 0.00144266  0.284578  0.998109
## ENSG00000198786   285982.6    3.94295e-06 0.00144267  0.418751  0.998109
## ENSG00000198695    77486.4    2.17944e-06 0.00144266  0.545266  0.998109
## ENSG00000198727   341661.0    3.13512e-06 0.00144266  0.525181  0.998109
##                                       pathway     pval     padj log2err    ES
##                                        <char>    <num>    <num>   <num> <num>
## 1:               HALLMARK_ALLOGRAFT_REJECTION 1.07e-10 5.35e-09   0.839 0.486
## 2:             HALLMARK_INFLAMMATORY_RESPONSE 2.28e-10 5.70e-09   0.827 0.474
## 3:           HALLMARK_TNFA_SIGNALING_VIA_NFKB 2.12e-07 3.53e-06   0.690 0.425
## 4: HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION 6.27e-07 7.84e-06   0.659 0.414
## 5:                 HALLMARK_KRAS_SIGNALING_UP 1.74e-06 1.66e-05   0.644 0.420
## 6:                       HALLMARK_COAGULATION 1.99e-06 1.66e-05   0.627 0.458
##      NES  size  leadingEdge
##    <num> <int>       <list>
## 1:  2.20   177 CD96, NL....
## 2:  2.16   191 CD14, NL....
## 3:  1.94   191 CCL20, S....
## 4:  1.88   190 THBS1, S....
## 5:  1.90   178 NAP1L2, ....
## 6:  1.98   126 THBS1, S....

8.2.1 Hallmark gene sets

#prepare data for plot

#mark top common gene sets
top10_common <- intersect(fgsea_hallmark$pathway[1:10], 
                   fgsea_hallmark_knis$pathway[1:10])

#plot CPS data
hallmark_plot <- fgsea_hallmark |> 
  arrange(pval) |> 
  slice_head(n=10) |> 
  mutate(direction = as.character(ifelse(sign(NES) <0, "Up", "Down")),
         is_top10 = pathway %in% top10_common,
         pathway = as.character(pathway),
         pathway_formatted = as.character(ifelse(is_top10, paste0("**", pathway, "**"), pathway)))

hallmark_plot$pathway_formatted <- factor(hallmark_plot$pathway_formatted, 
                                          levels = sort_by(unique(hallmark_plot$pathway_formatted), 
                                                                   rev(hallmark_plot$pval)))


hallmark <- hallmark_plot |> 
  ggplot(aes(x=-log10(pval), y=pathway_formatted, fill=direction)) +
  geom_col(alpha=0.75) +
  labs(
    y = "",
    x = expression(-log[10]*italic(P)),
    fill = "Direction",
    title = "Hallmark"
  ) +
  theme_figures_border+
  scale_y_discrete(expand = c(0.075,0.01))+
  scale_x_continuous(expand = c(0.025,0))+
  scale_fill_manual(values="lightcoral")

Fig5f <- hallmark +
  theme(axis.text.y = element_markdown(color="black", size=8),
        plot.margin = margin(t=2, l=1, b=1, r=1, unit = "cm"))
Fig5f

8.2.2 Volcano plot

#prepare data for plot

#combine DEres
DEres_comb <- cbind(rbind(DEres_cps, DEres_knis), 
                            source=c(rep("CPS1000", nrow(DEres_cps)), rep("Knisbacher",nrow(DEres_knis))))

#define significant genes
DEres_comb$sign <- ifelse((DEres_comb$padj < 0.1 & 
                                 abs(DEres_comb$log2FoldChange) > 2), TRUE, FALSE)
DEres_comb$source_sign <- ifelse(DEres_comb$sign == TRUE, DEres_comb$source, "non_sign")

#prepare annotation
colors_volcano <- setNames(c("lightgrey", "red", "lightblue"), unique(DEres_comb$source_sig))

#annotate significant genes in top hallmark genesets and map to genes
sets_hallmark <- msigdbr(species = "Homo sapiens", collection = "H")

gene_to_hallmark <- sets_hallmark |> 
  group_by(gene_symbol) |> 
  summarize(hallmark_pathways = paste(gs_name, collapse = "; ")) |> 
  ungroup()

DEres_comb <- DEres_comb |> 
  left_join(gene_to_hallmark, by = c("external_gene_name" = "gene_symbol"))

#plot
set.seed(123)
volcano <- DEres_comb |> 
  ggplot(aes(x = log2FoldChange, y = -log10(pvalue))) +
  geom_point(aes(color=source_sign), size=1, alpha = 0.7) +
  geom_label_repel(
    aes(label = ifelse(
      (str_detect(hallmark_pathways, 
                  "HALLMARK_TNFA_SIGNALING_VIA_NFKB|HALLMARK_INFLAMMATORY_RESPONSE")) & 
        source_sign != "non_sign", 
      external_gene_name,
      NA
    ), fill=source_sign, segment.color=source_sign),
    color="black",
    box.padding = 0.4, 
    max.overlaps = Inf,
    size=3,
    point.padding = 0.2,
    label.size = 0.25,
    min.segment.length = 0
  )+
  theme_figures+
  labs(
    title = "",
    x = expression(log[2]~FC),
    y = expression(-log[10]*italic(P)),
    color = "Dataset"
  ) +
  scale_color_manual(values = colors_volcano, 
                     labels=c("CPS1000", "CLL-map Portal", "not significant"))+
  scale_fill_manual(values = colors_volcano, 
                    labels=c("CPS1000", "CLL-map Portal", "not significant"),
                    guide = "none")+
  scale_discrete_manual("segment.color", values = colors_volcano, guide = "none")+
  scale_x_continuous(limits=c(-10,10))+
  annotate("text", x=5, y=8, label = "Down in DDX3X\nmutants", size = 3)+
  annotate("text", x=-5, y=8, label = "Up in DDX3X\nmutants", size = 3) +
  guides(color = guide_legend(override.aes = list(size = 2)))

Fig5g <- volcano + theme(plot.margin = margin(t=1.5, l=1, b=1.5, unit = "cm"))
Fig5g
## Warning: Removed 31720 rows containing missing values or values outside the scale range
## (`geom_label_repel()`).

#multipanel Fig.5

Fig5b <- Fig5b_top/Fig5b_bottom + plot_layout(ncol=1, guides = 'collect') & theme(legend.position='right', 
                                                                                  plot.margin=margin(t=0.5, b=0.5, l=0.5, unit="cm"))

col1 <- plot_grid(Fig5a, Fig5c, Fig5d, Fig5e,
                  align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.2, 0.12, 0.25, 0.23), 
                   labels=c("A", "C", "D", "E"), label_size=14)

col2 <- plot_grid(Fig5b, Fig5f, Fig5g, NULL, 
                  align = "v", axis = c("r"),
                  ncol = 1, rel_heights = c(0.5, 0.2, 0.25, 0.05), 
                  labels=c("B", "F", "G", ""), label_size=14)
## Warning: Removed 31720 rows containing missing values or values outside the scale range
## (`geom_label_repel()`).
Fig5 <- plot_grid(col1, col2, ncol=2, rel_widths = c(0.6, 0.4)) & theme(plot.margin=margin(l=1,t=1,b=1,r=1, unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.svg", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.pdf", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 56 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.png", width=42, height=59.4, units = "cm", dpi=600) #remove later
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

8.2.3 Heatmaps

8.2.4 Genes of interest: NLRP3 and DDX3Y

9 Longitudinal analysis

9.1 Global distribution

#prepare data, use all samples, limit to CLL
treatments_cll <- treatments |> 
  filter(patientID %in% screen_long[screen_long$diagnosis == "CLL",]$patientID)

#subtract suffixes from sampleID
treatments_cll$sample1 <- gsub("-[1234]$", "", treatments_cll$sample1)
treatments_cll$sample2 <- gsub("-[1234]$", "", treatments_cll$sample2)

# merge tables, limit to CLL samples
merged <- left_join(samples, screen_allSamples[, !colnames(screen_allSamples) %in% "patientID"], by = "sampleID") |> 
  filter(patientID %in% screen_long[screen_long$diagnosis == "CLL",]$patientID)

#add date of sampling
#subtract suffixes from sampleID
merged$sampleID <- gsub("-[1234]$", "", merged$sampleID)

#verify sampleID is unique
length(unique(merged$sampleID)) == nrow(merged)
## [1] TRUE
merged_date <- left_join(merged, survival[, colnames(survival) %in% c("sampleID", "sampleDate")], by = "sampleID") |> 
  relocate(sampleDate, .after = sampleID)

#check NA
sum(is.na(merged_date$sampleDate))
## [1] 0
#transfrom in long data frame
merged_table_long <- merged_date |>
  pivot_longer(cols = c(4:length(merged_date)), names_to = "drug", values_to = "viability")

#add concentration level and remove last two characters from each string in 'drug' column
merged_table_long <- merged_table_long |> 
  mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")),
         drug = str_replace(drug, "_\\d+$", ""))

#create means
merged_table_means <- merged_table_long |> 
  group_by(patientID, sampleID, sampleDate, drug) |> 
  summarize(mean_viab=mean(viability))
## `summarise()` has grouped output by 'patientID', 'sampleID', 'sampleDate'. You
## can override using the `.groups` argument.
#convert to wide format
longitudinal_wide <- merged_table_means |> 
  pivot_wider(names_from = drug, values_from = mean_viab)

#calculate pair-wise viability differences
differences_long <- create_mean_differences(treatments, longitudinal_wide)

#summary of sample pairs with treatment annotation and ex vivo viability data
summary_table <- differences_long |> 
  group_by(therapy) |> 
  summarize(sample_pairs = n()/63,
            patients = n_distinct(patientID),
            unique_samples = n_distinct(c(sample1, sample2)),
            unique_sample1 = n_distinct(sample1),
            unique_sample2 = n_distinct(sample2)) |> 
  bind_rows(differences_long |> 
              summarise(therapy = "overall",
                        sample_pairs = n()/63,
                        patients = n_distinct(patientID),
                        unique_samples = n_distinct(c(sample1, sample2)),
                        unique_sample1 = n_distinct(sample1),
                        unique_sample2 = n_distinct(sample2)))
summary_table
## # A tibble: 4 × 6
##   therapy  sample_pairs patients unique_samples unique_sample1 unique_sample2
##   <chr>           <dbl>    <int>          <int>          <int>          <int>
## 1 chemo              47       40             87             41             46
## 2 none               65       65            130             65             65
## 3 targeted           20       19             39             19             20
## 4 overall           132      110            242            111            131
#date intervals of samples
df_date <- differences_long |> 
  filter(drug == "Ibrutinib") |> 
  dplyr::select("patientID", "pair", "sample1", "sample2") |> 
  merge(merged_date[, colnames(merged_date) %in% c("sampleID", "sampleDate")], by.x = "sample1", by.y ="sampleID") |> 
  rename(sampleDate1 = sampleDate)

df_date <- df_date |> 
  rename(sampleID = sample2) |> 
  left_join(merged_date[, colnames(merged_date) %in% c("sampleID", "sampleDate")], by ="sampleID") |> 
  rename(sampleDate2 = sampleDate, sample2 = sampleID) |> 
  mutate(diff_date = sampleDate2 - sampleDate1)

df_date$diff_date <- as.numeric(df_date$diff_date)
summary(df_date$diff_date)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       7     224     515     580     892    1757
#sample pairs with more than 0.1 change in viability
differences_long |> 
  filter(abs(difference) > 0.1) |> 
  nrow()/nrow(differences_long)*100
## [1] 4.85
#plot
long_distr <- differences_long |> 
  ggplot(aes(difference))+
    geom_histogram(color="white", fill="grey40", alpha=0.5,bins = 50)+
    geom_vline(xintercept = 0, linetype = "dashed", color = "black")+
  labs(x="Difference in mean viability", y="Count")+
  theme_figures+
  scale_y_continuous(expand = c(0.02,0))+
  scale_x_continuous(expand = c(0.02,0), limits=c(-0.3, 0.3))

Fig6a <- long_distr + theme(plot.margin = margin(t=1, b=1, l=1, unit = "cm"))
Fig6a
## Warning: Removed 129 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

9.2 Illustration of treatment contexts

## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `timepoint = as.numeric(timepoint)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## Warning in geom_segment(aes(x = 0.5, y = 6, xend = 3, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 6, xend = 0.5, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 5, xend = 3, yend = 5), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.8, y = 4, xend = 3, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.2, y = 4, xend = 1.8, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 4, xend = 1.2, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.5, y = 3, xend = 3, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 3, xend = 1.5, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0.8, y = 2, xend = 3, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 2, xend = 0.8, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.5, y = 1, xend = 3, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 1, xend = 1.5, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1, y = 0.5, xend = 1, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 2, y = 0.5, xend = 2, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

9.3 Longitudinal drug response evoluation at the single concentration level

#use individual drug responses
merged_table_wide <- merged_table_long |> 
  pivot_wider(names_from = drug, values_from = viability)

#calculate pair-wise viability differences
differences_long_allconc <- create_conc_differences(treatments, merged_table_wide)
## Warning in left_join(., viab_df, by = c(sample1 = "sampleID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 71 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## Warning in left_join(., viab_df, by = c(sample2 = "sampleID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#add concentrations
differences_long_allconc <- differences_long_allconc |> 
  left_join(screen_long |> 
              distinct(drug, conc_index, concentration), by = c("drug", "conc_index"))  |> 
  drop_na(conc_index)

#t-test with FDR within status and therapy
df_ttest_allconc <- differences_long_allconc |> 
  group_by(drug, conc_index, status, therapy) |> 
  mutate(pval = t.test(sample1_value, sample2_value, paired = TRUE)$p.value) |> 
  ungroup() |>
  group_by(status, therapy) |>
  mutate(padj = p.adjust(pval, method="BH"))

#order drugs by their association similarity (hierarchical clustering)
pMat <- df_ttest_allconc |> 
  filter(padj < 0.1) |>
  group_by(drug, conc_index, therapy, status) |> 
  mutate(sign_log_pval = -log10(mean(pval)) * sign(mean(percent_change))) |>
  ungroup() |> 
  dplyr::select(drug, conc_index, therapy, status, sign_log_pval) |>
  # Create a unique identifier for each condition
  unite("condition", therapy, status, conc_index, sep = "_") |> 
  pivot_wider(names_from = condition, 
              values_from = sign_log_pval,
              values_fn = median) |>
  as.data.frame() |>
  column_to_rownames("drug") |> 
  as.matrix()

#replace NAs with 0 (no significant effect)
pMat[is.na(pMat)] <- 0

#perform hierarchical clustering
set.seed(123)
drug_clusters <- hclust(dist(pMat), method = "ward.D2")
drug_order <- rownames(pMat)[drug_clusters$order]

df_ttest_allconc$drug <- factor(df_ttest_allconc$drug, levels = drug_order)

#rename variables
df_ttest_allconc$therapy <- recode(df_ttest_allconc$therapy,
                                   "chemo" = "Chemo-Immuno",
                                   "targeted" = "Targeted", 
                                   "none" = "None")

#plot heatmap
pval_hmap <- df_ttest_allconc |> 
  filter(padj <0.1) |>
  group_by(drug, conc_index, therapy, status) |> 
  mutate(sign_log_pval = -log10(mean(pval)) * sign(mean(percent_change))) |>  
  ggplot(aes(x=conc_index, y=drug, fill=sign_log_pval))+
  geom_tile(size = 0.2, color = "white")+
  facet_wrap(therapy~status, ncol=3)+
  scale_fill_gradient2(low="blue", high="red", limits=c(-4, 4))+
  labs(y="", x="Concentration index", fill = "**-log<sub>10</sub>*P*<br>with direction**") +
  theme_figures_facet_angle45_x_legend+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0),
        legend.title = ggtext::element_markdown())

Fig6c <- pval_hmap + theme(plot.margin = margin(l=1, t=1, b=0.5, unit = "cm"))
Fig6c

9.4 Longitudinal changes in patients treated with targeted therapies

#create list of sign. drugs in targeted, start between
targ_list <- df_ttest_allconc |> 
  filter(padj <0.1, therapy == "Targeted", status == "start between") |> 
  group_by(drug) |> 
  mutate(median_diff = median(difference)) |> 
  arrange(median_diff)

targ_list$drug <- factor(targ_list$drug, levels = unique(targ_list$drug))

#use differences in mean viability
median_ranking_targeted <- differences_long |> 
  filter(drug %in% unique(targ_list$drug), therapy == "targeted", status == "start between") |> 
  group_by(drug) |> 
  mutate(median_diff = median(difference)) |> 
  arrange(median_diff)

median_ranking_order <- unique(median_ranking_targeted$drug)

#prepare pathway labeling
pathway_annotation <- differences_long |> 
  filter(drug %in% unique(targ_list$drug), 
         therapy == "targeted", 
         status == "start between") |> 
  mutate(drug = factor(drug, levels = median_ranking_order)) |>
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug")

median_ranking_order <- unique(median_ranking_targeted$drug)
pathway_annotation$Pathway_mod2 <- factor(pathway_annotation$Pathway_mod2, levels = order_pway2)

#plot
long_mean_viab <- pathway_annotation |>
  ggplot(aes(y = difference, x = drug, fill=Pathway_mod2)) + 
  geom_boxplot(alpha = 0.8, outliers = TRUE) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed", linewidth =0.5) +
  scale_fill_manual(values=colors_pathways_mod2)+
  theme_figures_angle45_x+
  labs(fill = "Pathway", y = "Difference in mean viability", x = "")

Fig6d <- long_mean_viab + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig6d

9.4.1 Drugs of interest

#plot OTX015 and TW-37
otx_tw <- differences_long_allconc |> 
  mutate(drug = case_when(drug == "OTX015" ~ "OTX015 (BET)",
                          drug == "TW-37" ~ "TW-37 (BCL2/MCL1)",
                          TRUE ~ drug)) |> 
  pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |> 
  filter(drug %in% c("OTX015 (BET)","TW-37 (BCL2/MCL1)"), status == "start between", therapy == "targeted") |> 
  ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
  geom_boxplot(alpha = 0.3, width = 0.5, position=position_dodge(0.75), outlier.shape = NA)+
  geom_point(aes(color = timepoint), alpha = 0.25, 
           position = position_jitterdodge(dodge.width = 0.8, jitter.width = 0.2))+ 
  labs(y = "Viability", color= "Sampling")+ 
  facet_wrap(~drug, ncol=2) + 
  stat_compare_means(aes(group = timepoint), 
                     method = "t.test",
                     paired = TRUE,
                     label = "p.signif",
                     label.y = 1.2, 
                     size=3)+
  labs(x = bquote("Concentration ("*mu*"M)"))+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.38)) +
  scale_color_manual(labels = c("sample1_value" = "Before treatment", 
                                  "sample2_value" = "On treatment"),
                       values = colors_treatment)+
  scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
  guides(color = guide_legend(override.aes = list(label = c(""))))+
  theme_figures_facet_angle45_x_legend

Fig6e <- otx_tw + theme(plot.margin = margin(l=1, t=1, unit = "cm"))
Fig6e

#plot BCR inhibitors
bcr <- differences_long_allconc |> 
  pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |> 
  filter(drug %in% c("Dasatinib","Ganetespib", "Idelalisib", "AZD8055", "Cobimetinib", "Ibrutinib"), 
         status == "start between", therapy == "targeted") |> 
  mutate(drug = case_when(drug == "AZD8055" ~ "AZD8055 (mTOR)",
                          drug == "Cobimetinib" ~ "Cobimetinib (MEK)",
                          drug == "Dasatinib" ~ "Dasatinib (ABL/LYN)",
                          drug == "Idelalisib" ~ "Idelalisib (PI3K)",
                          drug == "Ganetespib" ~ "Ganetespib (HSP90)",
                          drug == "Ibrutinib" ~ "Ibrutinib (BTK)",
                          TRUE ~ drug)) |> 
  ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(alpha = 0.3, position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.1)) + 
  theme_figures_facet_angle45_x_legend+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.2)) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  facet_wrap(~drug)+
  stat_compare_means(aes(group = timepoint), 
                     method = "t.test",
                     paired = TRUE,
                     label = "p.signif",
                     label.y = 1.1, 
                     size=3)+
  labs(y = "Viability", color= "Sampling", x = bquote("Concentration ("*mu*"M)"))+
  scale_color_manual(labels = c("sample1_value" = "Before treatment", 
                                  "sample2_value" = "On treatment"),
                       values = colors_treatment)+
  scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
  guides(color = guide_legend(override.aes = list(label = c(""))))

SFig12c <- bcr + theme(plot.margin = margin(t=1, l=1, unit = "cm"))


#plot MCL1 and BCL2 inhibitors
mcl_bcl <- differences_long_allconc |> 
  pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |> 
  filter(drug %in% c("Venetoclax","A.1210477"), status == "start between", therapy == "targeted") |> 
  mutate(drug = case_when(drug == "Venetoclax" ~ "Venetoclax (BCL2)",
                          drug == "A.1210477" ~ "A.1210477 (MCL1)",
                          TRUE ~ drug)) |> 
  ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(alpha = 0.3, position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.1)) + 
  theme_figures_facet_angle45_x_legend+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.2)) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  facet_wrap(~drug, nrow=2)+
  stat_compare_means(aes(group = timepoint), 
                     method = "t.test",
                     paired = TRUE,
                     label = "p.signif",
                     label.y = 1.1, 
                     size=3)+
  labs(y = "Viability", color= "Sampling", x = bquote("Concentration ("*mu*"M)"))+
  scale_color_manual(labels = c("sample1_value" = "Before treatment", 
                                  "sample2_value" = "On treatment"),
                       values = colors_treatment)+
  scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
  guides(color = guide_legend(override.aes = list(label = c(""))))

SFig12d <- mcl_bcl + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
#multipanel

col1 <- plot_grid(Fig6a, Fig6b, 
                  align = "v", axis = c("r"), 
                  ncol = 1, rel_heights = c(0.4, 0.4), 
                  labels=c("A", "B"), label_size=14)
## Warning: Removed 129 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning in geom_segment(aes(x = 0.5, y = 6, xend = 3, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 6, xend = 0.5, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 5, xend = 3, yend = 5), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.8, y = 4, xend = 3, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.2, y = 4, xend = 1.8, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 4, xend = 1.2, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.5, y = 3, xend = 3, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 3, xend = 1.5, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0.8, y = 2, xend = 3, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 2, xend = 0.8, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.5, y = 1, xend = 3, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 1, xend = 1.5, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1, y = 0.5, xend = 1, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 2, y = 0.5, xend = 2, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
col2 <- plot_grid(Fig6c, ncol = 1, 
                  #rel_heights = c(0.8, 0.2),
                  labels=c("C", ""), label_size=14)

col3 <- plot_grid(Fig6d,Fig6e, ncol = 1, align = "v", axis = c("l", "r"),
                  rel_heights = c(0.55, 0.45), 
                  labels=c("D", "E"), label_size=14)

top <- plot_grid(col1, col2, col3, ncol=3, 
                 align = "h", axis = c("t", "b"),
                 rel_widths = c(0.2, 0.35, 0.5))

Fig6 <- plot_grid(top, NULL, nrow =2, rel_heights = c(0.25, 0.75)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.png", width=42, height=59.4, units = "cm", dpi=600)

10 Correlation of drug response metrics with clinical outcomes

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `crr = as.numeric(crr)`.
## Caused by warning:
## ! NAs introduced by coercion
## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'diagnosis', 'patientID'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#multipanel
Fig7a <- p1+p2 + plot_annotation("CLL", theme = theme(
  plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
  plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
  plot_layout(guides = "collect")
Fig7a
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Fig7b <- p3+p4 + plot_annotation("AML", theme = theme(
  plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
  plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
  plot_layout(guides = "collect")
Fig7b
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`sta_corr()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

10.1 Illustration of drug-response metrics

## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"], : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"] - : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

10.2 Correlation coefficients for AML and CLL

#create boxplots for correlations coefficients of drug metrics in AML and CLL
#function to calculate correlations
compute_cor <- function(data, parameters, outcome, diagnosis) {
  cor_data <- sapply(parameters, function(par) {
    cor(data[[outcome]], data[[par]], use = "pairwise.complete.obs", method = "pearson")
  })
  
  data.frame(
    parameter = parameters,
    cor = cor_data,
    diagnosis = diagnosis,
    outcome = outcome
  )
}

parm<- c("HS_mean", "Einf_mean", "pIC50_mean", "R2_mean", "AUC_mean", "viab_mean")

CLL_ORR <- compute_cor(CLL_outcomes, parm, "orr", "CLL")
CLL_CRR <- compute_cor(CLL_outcomes, parm, "crr",  "CLL")

AML_ORR <- compute_cor(AML_outcomes, parm, "orr", "AML")
AML_CRR <- compute_cor(AML_outcomes, parm, "crr",  "AML")

cor_combined <- bind_rows(CLL_ORR, CLL_CRR, AML_ORR, AML_CRR)

#plot
par_list <- cor_combined |> 
  filter(parameter != "pF_mean", diagnosis == "CLL", outcome == "crr") |> 
  arrange(abs(cor)) |> 
  pull(parameter)
  
cor_combined$parameter <- factor(cor_combined$parameter, levels = par_list)  

Fig7d <- cor_combined |> 
filter(str_detect(parameter, "mean")) |> 
  mutate(direction = ifelse(sign(cor) >0, "positive", "negative")) |> 
  filter(parameter != "pF_mean") |> 
  ggplot(aes(y=parameter, x=abs(cor), fill=diagnosis))+
  geom_col(aes(alpha=outcome), position = position_dodge(width=0.8), width=0.8)+
  facet_wrap(~diagnosis, ncol=2)+
  labs(title="", y="Parameter (mean)", x="Absolute Pearson correlation coefficient",
  fill = "Diagnosis", alpha="Outcome")+
  theme_figures_facet_angle60_x+
  scale_fill_manual(values=colors_diag, guide="none")+
  scale_alpha_manual(values=c(1, 0.5), labels = c("CRR", "ORR"))+
  xlim(0,1)+
  scale_y_discrete(labels = c("Hill coefficient", 
                              expression(pIC[50]),  # subscript
                              expression(E[inf]),   # subscript
                              expression(R^2),      # superscript
                              "AUC",
                              "Mean viability"))
  
Fig7d <- Fig7d + theme(plot.margin = margin(t=1, b=1, l=1, unit = "cm"))
Fig7d

10.3 Time-to-event

#prepare data: use mean viability in CLL
screen_means_cll <- screen_means |> 
  filter(diagnosis == "CLL")

#merge with survival data, add age
surv_combined <- left_join(
  screen_means_cll,
  screen_long |> distinct(patientID, sampleID),  # Remove duplicates
  by = "patientID"
) |> 
  left_join(survival |> rename(patientID = patID), 
            by = c("sampleID", "patientID")) |> 
  left_join(age |> dplyr::select(-sampleDate), 
            by = c("sampleID", "patientID"))

#merge with genomic aberrations
surv_combined <- left_join(surv_combined, metadata |> 
                             dplyr::select(-diagnosis, -treatment), by = c("patientID" = "Patient.ID")) |> 
  mutate(IGHV.status = case_when(IGHV.status == "M" ~ "1",
                                 IGHV.status == "U" ~ "0",
                                 TRUE ~ IGHV.status))

#summary
length(unique(surv_combined$patientID))
## [1] 275
surv_combined |> 
  group_by(treatedAfter) |> 
  summarize(n=n()/63)
## # A tibble: 3 × 2
##   treatedAfter     n
##   <lgl>        <dbl>
## 1 FALSE          145
## 2 TRUE            99
## 3 NA              31
#perform cox regression for each drug and clinical feature

#univariate analysis for OS: drugs, stratified by IGHV
resOS_uni <- filter(surv_combined, !is.na(OS), !is.na(IGHV.status)) %>%
  group_by(drug, IGHV.status) %>%
  do(com_uni(.$mean_viab, .$OS, .$died, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "OS")

##univariate analysis for TTT: drugs, stratified by IGHV
resTTT_uni <- filter(surv_combined, !is.na(TTT), !is.na(IGHV.status)) %>%
  group_by(drug, IGHV.status) %>%
  do(com_uni(.$mean_viab, .$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "TTT")

#plot Hazard ratios and p values
plotTab_uni <- bind_rows(resOS_uni, resTTT_uni) %>%
  filter(drug %in% unique(filter(.,p.adj < 0.05)$drug))

cox_uni_strat <- plotTab_uni |> 
  mutate(IGHV.status = ifelse(IGHV.status == "1", "M-CLL", "U-CLL")) |> 
  ggplot(aes(x=drug, y = HR, col = Endpoint, dodge = Endpoint)) + 
  geom_hline(yintercept = 1, linetype = "dotted") +
  geom_point(position = position_dodge(width=0.75)) +
  geom_errorbar(position = position_dodge(width =0.75), 
                aes(ymin = lower, ymax = higher), width = 0.3) + 
  geom_text(position = position_dodge(width =0.75), size = 3,
            aes(y = higher + 0.6, 
                label = ifelse(p < 0.001, 
                               "italic(P)<0.001", 
                               paste0("italic(P)==", sprintf("%1.3f", p)))),
            parse = TRUE) +
  facet_wrap(~IGHV.status, scales = "free") + 
  labs(y="Hazard ratio", x="")+
  coord_flip() + 
  theme_figures_facet+
  scale_color_manual(values=colors_cox)+
  guides(color = guide_legend(override.aes = aes(label = "")))+
  ylim(0,4)

SFig13j <- cox_uni_strat + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
SFig13j

#multivariate analysis for OS, combine del17p and TP53
resOS <- surv_combined |> 
  mutate(TP53_del17p = ifelse(TP53==1|del17p==1, 1,0)) |> 
  filter(!is.na(OS), !is.na(IGHV.status), !is.na(TP53_del17p), !is.na(age)) %>%
  group_by(drug) %>%
  do(com(.$mean_viab, .$IGHV.status, .$TP53_del17p, .$age, .$OS, .$died, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "OS")

#multivariate analysis for TTT, combine del17p and TP53
resTTT <- surv_combined |> 
  mutate(TP53_del17p = ifelse(TP53==1|del17p==1, 1,0)) |> 
  filter(!is.na(TTT), !is.na(IGHV.status), !is.na(TP53_del17p), !is.na(age)) %>%
  group_by(drug) %>%
  do(com(.$mean_viab, .$IGHV.status, .$TP53_del17p,.$age,.$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "TTT")

#plot Hazard ratios and p values for ibrutinib
plotTab <- bind_rows(resOS, resTTT) %>%
  filter(drug %in% unique(filter(.,p.adj < 0.05)$drug))

multi_cox_ibr <- plotTab |> 
  filter(drug == "Ibrutinib") |> 
  ggplot(aes(x=variable, y = HR, col = Endpoint)) + 
  geom_hline(yintercept = 1, linetype = "dotted") +
  geom_point(position = position_dodge(width=0.75)) +
  geom_errorbar(position = position_dodge(width =0.75), 
                aes(ymin = lower, ymax = higher), width = 0.3) + 
  geom_text(position = position_dodge(width =0.75), size = 3,
            aes(y = higher + 0.6, 
                label = paste0("italic(P)==", sprintf("%1.3f", p))),
            parse = TRUE) +
  labs(y="Hazard ratio", x="")+
  coord_flip() + 
  theme_figures+
  scale_x_discrete(labels = c("TP53_del17p" = "TP53/del(17p)",
                              "response" = "Ex vivo\nresistance to\nibrutinib",
                              "ighv" = "IGHV", 
                              "age" = "Age at\nsampling"))+
  scale_color_manual(values=colors_cox)+
  guides(color = guide_legend(override.aes = aes(label = "")))+
  ylim(0,6)

Fig7e <- multi_cox_ibr + theme(plot.margin = margin(t=2, b=1, l=1, r=1, unit = "cm"))
Fig7e

#survival curves
#maxstat
km_surv <- surv_combined |> 
  filter(!is.na(IGHV.status), drug == "Ibrutinib") 

#OS
res.cut.os <- surv_cutpoint(km_surv, time = "OS", event = "died",
                         variables = c("mean_viab"))

summary(res.cut.os)
##           cutpoint statistic
## mean_viab    0.825      4.55
os_value <- summary(res.cut.os)$cutpoint
res.cat.os <- surv_categorize(res.cut.os)

fit_os <- survfit(Surv(OS, died) ~ mean_viab, data = res.cat.os)

#TTT
res.cut.ttt <- surv_cutpoint(km_surv, time = "TTT", event = "treatment",
                         variables = c("mean_viab"))

summary(res.cut.ttt)
##           cutpoint statistic
## mean_viab    0.895      6.15
ttt_value <- summary(res.cut.ttt)$cutpoint
res.cat.ttt <- surv_categorize(res.cut.ttt)

fit_ttt <- survfit(Surv(TTT, treatment) ~ mean_viab, data = res.cat.ttt)

#plot OS Kaplan-meier curve based on cut-off
km_surv <- km_surv |> 
  mutate(ibr_resistance_os = ifelse(mean_viab < os_value, 0, 1), 
         ibr_resistance_ttt = ifelse(mean_viab < ttt_value, 0, 1))
  
os <- survfit2(Surv(OS, died) ~ ibr_resistance_os+IGHV.status, data = km_surv)

pval <- sub("^p", "", survfit2_p(os))

km_os <- os |>
  ggsurvfit() +
  labs(x = "Years", y = "Overall survival", title = "Overall survival") + 
  add_censor_mark(size = 1, alpha = 0.5) +
  scale_ggsurvfit() +
  labs(x="Years from sampling")+
  theme_figures+
  theme(legend.position = "bottom")+
  scale_color_manual(labels=c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", 
                              "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"), 
                     values = c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"))+
  annotate(
    "text",
    x = 2,
    y = 0.05,
    label = paste0("'Log-rank'~italic(P)", pval),
    parse = TRUE,
    hjust = 1,
    vjust = 0,
    size = 3
  )+
  guides(color = guide_legend(ncol = 2))

Fig7d1 <- km_os

#plot TTT Kaplan-meier curve based on cut-off
ttt <- survfit2(Surv(TTT) ~ ibr_resistance_ttt+IGHV.status, data = km_surv)

pval_ttt <- sub("^p", "", survfit2_p(ttt))

km_ttt <- ttt |> 
  ggsurvfit() +
  labs(x = "Years", y = "Treatment-free survival", title = "Time to treatment") + 
  add_censor_mark(size = 1, alpha = 0.5) +
  scale_ggsurvfit() +
  labs(x="Years from sampling")+
  theme_figures+
  theme(legend.position = "bottom")+
  scale_color_manual(labels=c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", 
                              "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"), 
                     values = c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"))+
  annotate(
    "text",
    x = 2,
    y = 0.05,
    label = paste0("'Log-rank'~italic(P)", pval_ttt),
    parse = TRUE,
    hjust = 1,
    vjust = 0,
    size = 3
  )+
  guides(color = guide_legend(ncol = 2))

Fig7d2 <- km_ttt

#combine
Fig7f <- (Fig7d1/Fig7d2) + plot_layout(ncol=1, guides = 'collect') & theme(legend.position='bottom', 
                                                                           plot.margin = margin(t=1, b=1, l=1, r=1, unit = "cm"))
Fig7f

#multipanel
col1row3 <- plot_grid(Fig7c, Fig7d, NULL,
                            align = "h", axis = c("t", "b"), ncol = 3, rel_widths = c(0.4, 0.5, 0.1), 
                            labels=c("", "D", ""), label_size=14)
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"], : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"] - : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
col1 <- plot_grid(Fig7a, Fig7b, col1row3, NULL,
                  align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.2, 0.2, 0.15, 0.45), 
                  labels=c("A", "B", "C"), label_size=14)
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`sta_corr()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
col2 <- plot_grid(Fig7e, Fig7f, NULL,
                  align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.15, 0.35, 0.5), 
                  labels=c("E", "F", ""), label_size=14)

Fig7 <- plot_grid(col1, col2, ncol =2, rel_widths = c(0.65, 0.35)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))
                                    
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.svg", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.pdf", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.png", width=42, height=59.4, units = "cm", dpi=600)
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

10.4 specific drugs

10.5 validation in EMBL2016